Spaces:
Sleeping
Sleeping
| """ | |
| Facial Comparison β HuggingFace Space | |
| ====================================== | |
| Detection : RetinaFace (public, well-known detector) | |
| Alignment : 5-point similarity transform β 112Γ112 canonical crop | |
| Embedding : facial_comparison.pt (private TorchScript model via HF secrets) | |
| Similarity : Augmented cosine (3Γ3 pairs) + sigmoid confidence | |
| """ | |
| import os | |
| import io | |
| import base64 | |
| import logging | |
| import numpy as np | |
| import torch | |
| import torch.nn.functional as F | |
| import cv2 | |
| from PIL import Image, ImageOps | |
| from typing import List, Tuple, Optional | |
| import gradio as gr | |
| # ββ Logging βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| logging.basicConfig(level=logging.INFO, format="%(levelname)s | %(name)s | %(message)s") | |
| logger = logging.getLogger("facial-comparison") | |
| # ββ Config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| MODEL_PATH = os.getenv("MODEL_PATH", "models/facial_comparison.pt") | |
| HF_MODEL_REPO = os.getenv("HF_MODEL_REPO", "") | |
| HF_MODEL_FILE = os.getenv("HF_MODEL_FILE", "facial_comparison.pt") | |
| THRESHOLD = float(os.getenv("THRESHOLD", 0.38)) | |
| FACE_RATIO_THRESH = float(os.getenv("FACE_RATIO_THRESHOLD", 0.15)) | |
| STEEPNESS = float(os.getenv("STEEPNESS", 12.0)) | |
| MAX_CONFIDENCE = float(os.getenv("MAX_CONFIDENCE", 99.9)) | |
| # ββ Canonical 5-point template (112Γ112, ArcFace standard) βββββββββββββββββββ | |
| # Used to align detected landmarks to a fixed pose before embedding | |
| ARCFACE_DST = np.array([ | |
| [38.2946, 51.6963], | |
| [73.5318, 51.5014], | |
| [56.0252, 71.7366], | |
| [41.5493, 92.3655], | |
| [70.7299, 92.2041], | |
| ], dtype=np.float32) | |
| # ββ Model loading βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _resolve_model_path() -> str: | |
| if os.path.exists(MODEL_PATH): | |
| return MODEL_PATH | |
| if HF_MODEL_REPO: | |
| from huggingface_hub import hf_hub_download | |
| logger.info(f"Pulling weights from Hub: {HF_MODEL_REPO}") | |
| return hf_hub_download( | |
| repo_id=HF_MODEL_REPO, | |
| filename=HF_MODEL_FILE, | |
| token=os.getenv("HF_TOKEN"), | |
| ) | |
| raise FileNotFoundError( | |
| f"Weights not found at '{MODEL_PATH}'. " | |
| "Set HF_MODEL_REPO + HF_MODEL_FILE + HF_TOKEN in Space secrets." | |
| ) | |
| def _load_models(): | |
| device = torch.device("cpu") | |
| logger.info("Loading TorchScript embedding model...") | |
| path = _resolve_model_path() | |
| model = torch.jit.load(path, map_location=device) | |
| model.eval() | |
| logger.info(f"Embedding model loaded from: {path}") | |
| # RetinaFace is imported here β lightweight, no ONNX runtime dependency | |
| logger.info("RetinaFace detector ready (loaded on first call)") | |
| return model | |
| _COMPARISON_MODEL = _load_models() | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Image utilities | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _fix_orientation(img: Image.Image) -> Image.Image: | |
| """EXIF-aware rotation β handles iPhone / Android captures.""" | |
| try: | |
| img = ImageOps.exif_transpose(img) | |
| img.info.pop("exif", None) | |
| except Exception: | |
| pass | |
| return img.convert("RGB") | |
| def _decode_to_rgb(img_input) -> Tuple[bool, Optional[np.ndarray]]: | |
| """ | |
| Accepts Gradio numpy (RGB), PIL Image, raw bytes, or base64 string. | |
| Returns (success, RGB uint8 ndarray). | |
| RetinaFace expects RGB; we keep everything in RGB throughout. | |
| """ | |
| try: | |
| if isinstance(img_input, np.ndarray): | |
| if img_input.ndim == 2: # grayscale β RGB | |
| img_input = cv2.cvtColor(img_input, cv2.COLOR_GRAY2RGB) | |
| elif img_input.shape[2] == 4: # RGBA β RGB | |
| img_input = cv2.cvtColor(img_input, cv2.COLOR_RGBA2RGB) | |
| return True, img_input.astype(np.uint8) | |
| if isinstance(img_input, Image.Image): | |
| return True, np.array(_fix_orientation(img_input), dtype=np.uint8) | |
| if isinstance(img_input, bytes): | |
| pil = Image.open(io.BytesIO(img_input)) | |
| return True, np.array(_fix_orientation(pil), dtype=np.uint8) | |
| if isinstance(img_input, str): | |
| return _decode_to_rgb(base64.b64decode(img_input)) | |
| except Exception as e: | |
| logger.error(f"Decode failed: {e}") | |
| return False, None | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Face alignment β similarity transform to ArcFace canonical crop | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _estimate_norm(lmk: np.ndarray, image_size: int = 112) -> np.ndarray: | |
| """ | |
| Estimate the similarity transform (rotation + scale + translation) that | |
| maps detected 5-point landmarks onto the ArcFace canonical template. | |
| Returns a 2Γ3 affine matrix. | |
| """ | |
| assert lmk.shape == (5, 2) | |
| dst = ARCFACE_DST * (image_size / 112.0) | |
| # Use OpenCV estimateAffinePartial2D (similarity: no shear) | |
| M, _ = cv2.estimateAffinePartial2D(lmk, dst, method=cv2.LMEDS) | |
| if M is None: | |
| # Fallback: least-squares full affine | |
| M, _ = cv2.estimateAffinePartial2D(lmk, dst, method=cv2.RANSAC) | |
| return M | |
| def _align_face(img_rgb: np.ndarray, landmarks: np.ndarray, | |
| image_size: int = 112) -> Optional[np.ndarray]: | |
| """Warp face to 112Γ112 canonical crop. Returns RGB uint8 or None.""" | |
| try: | |
| M = _estimate_norm(landmarks, image_size) | |
| if M is None: | |
| return None | |
| warped = cv2.warpAffine(img_rgb, M, (image_size, image_size), | |
| borderValue=0) | |
| return warped | |
| except Exception as e: | |
| logger.error(f"Alignment failed: {e}") | |
| return None | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # RetinaFace detection | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _retinaface_detect(img_rgb: np.ndarray) -> list: | |
| """ | |
| Run RetinaFace on an RGB image. | |
| Returns list of dicts: {bbox, landmarks, score} | |
| landmarks shape: (5, 2) β [left_eye, right_eye, nose, left_mouth, right_mouth] | |
| """ | |
| from retinaface import RetinaFace | |
| # RetinaFace.detect_faces returns dict keyed by "face_1", "face_2", ... | |
| # Each value: {"facial_area": [x1,y1,x2,y2], "landmarks": {...}, "score": float} | |
| detections = RetinaFace.detect_faces(img_rgb) | |
| if not isinstance(detections, dict): | |
| return [] | |
| faces = [] | |
| for key, val in detections.items(): | |
| try: | |
| x1, y1, x2, y2 = val["facial_area"] | |
| score = float(val.get("score", 1.0)) | |
| lm = val["landmarks"] | |
| # RetinaFace landmark keys | |
| pts = np.array([ | |
| lm["left_eye"], | |
| lm["right_eye"], | |
| lm["nose"], | |
| lm["mouth_left"], | |
| lm["mouth_right"], | |
| ], dtype=np.float32) | |
| faces.append({ | |
| "bbox": (x1, y1, x2, y2), | |
| "landmarks": pts, | |
| "score": score, | |
| "area": (x2 - x1) * (y2 - y1), | |
| }) | |
| except (KeyError, TypeError): | |
| continue | |
| # Sort by area descending (largest face first) | |
| faces.sort(key=lambda f: f["area"], reverse=True) | |
| return faces | |
| def _detect_and_align(img_rgb: np.ndarray, | |
| image_idx: int) -> Tuple[Optional[dict], str]: | |
| """ | |
| Detect faces in one image with rotation retry. | |
| Returns (face_result_dict | None, feedback_message). | |
| face_result_dict keys: image_tensor (numpy), detection_confidence | |
| """ | |
| faces = _retinaface_detect(img_rgb) | |
| # Rotation retry if nothing found | |
| if not faces: | |
| for angle, code in [(90, cv2.ROTATE_90_CLOCKWISE), | |
| (180, cv2.ROTATE_180), | |
| (270, cv2.ROTATE_90_COUNTERCLOCKWISE)]: | |
| rotated = cv2.rotate(img_rgb, code) | |
| faces = _retinaface_detect(rotated) | |
| if faces: | |
| img_rgb = rotated | |
| logger.info(f"Image {image_idx}: detected after {angle}Β° rotation") | |
| break | |
| if not faces: | |
| return None, (f"No face detected in image {image_idx}. " | |
| "Ensure the face is clearly visible, well-lit, and unobstructed.") | |
| # Two-face handling: keep largest if the second is tiny (background/watermark) | |
| if len(faces) >= 2: | |
| ratio = faces[1]["area"] / faces[0]["area"] | |
| if ratio >= FACE_RATIO_THRESH: | |
| return None, (f"Two comparable faces found in image {image_idx} " | |
| f"(size ratio {ratio:.2f}). Please upload an image " | |
| "with a single dominant face.") | |
| # else: silently drop the smaller face | |
| face = faces[0] | |
| crop = _align_face(img_rgb, face["landmarks"]) | |
| if crop is None: | |
| return None, f"Face alignment failed for image {image_idx}." | |
| # β float32 tensor [1, 3, 112, 112] in [0, 1] | |
| tensor = (torch.from_numpy(crop.astype(np.float32)) | |
| .permute(2, 0, 1) | |
| .unsqueeze(0) / 255.0) | |
| return { | |
| "image_tensor": tensor.numpy(), | |
| "detection_confidence": round(face["score"], 3), | |
| }, "OK" | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Embedding + similarity | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _augmented_embeddings(tensor: torch.Tensor) -> List[torch.Tensor]: | |
| """Original + horizontal flip + brightened β 3 embeddings.""" | |
| flip = torch.flip(tensor, dims=[3]) | |
| bright = torch.clamp(tensor * 1.5, 0, 1) | |
| with torch.no_grad(): | |
| return [_COMPARISON_MODEL(t).squeeze() for t in [tensor, flip, bright]] | |
| def _avg_cosine(embs1: List[torch.Tensor], | |
| embs2: List[torch.Tensor]) -> float: | |
| sims = [F.cosine_similarity(e1.unsqueeze(0), e2.unsqueeze(0)).item() | |
| for e1 in embs1 for e2 in embs2] | |
| return sum(sims) / len(sims) | |
| def _cosine_to_confidence(score: float) -> float: | |
| conf = 1.0 / (1.0 + np.exp(-STEEPNESS * (score - THRESHOLD))) | |
| return round(min(conf * 100.0, MAX_CONFIDENCE), 2) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Full pipeline | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _compare(img1, img2) -> dict: | |
| ok1, rgb1 = _decode_to_rgb(img1) | |
| ok2, rgb2 = _decode_to_rgb(img2) | |
| if not ok1 or not ok2: | |
| return {"success": False, "message": "Image decoding failed.", | |
| "score": 0.0, "confidence": 0.0, "match": False, | |
| "det1": 0.0, "det2": 0.0} | |
| face1, msg1 = _detect_and_align(rgb1, 1) | |
| if face1 is None: | |
| return {"success": False, "message": msg1, | |
| "score": 0.0, "confidence": 0.0, "match": False, | |
| "det1": 0.0, "det2": 0.0} | |
| face2, msg2 = _detect_and_align(rgb2, 2) | |
| if face2 is None: | |
| return {"success": False, "message": msg2, | |
| "score": 0.0, "confidence": 0.0, "match": False, | |
| "det1": face1["detection_confidence"], "det2": 0.0} | |
| t1 = torch.tensor(face1["image_tensor"], dtype=torch.float32) | |
| t2 = torch.tensor(face2["image_tensor"], dtype=torch.float32) | |
| score = _avg_cosine(_augmented_embeddings(t1), _augmented_embeddings(t2)) | |
| confidence = _cosine_to_confidence(score) | |
| match = score >= THRESHOLD | |
| return { | |
| "success": True, | |
| "match": match, | |
| "score": round(score, 4), | |
| "confidence": confidence, | |
| "message": "Faces matched" if match else "Faces do not match", | |
| "det1": face1["detection_confidence"], | |
| "det2": face2["detection_confidence"], | |
| } | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Gradio inference wrapper | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_comparison(img1: np.ndarray, img2: np.ndarray): | |
| if img1 is None or img2 is None: | |
| err = _verdict_html(False, None, "Upload both images to run comparison.") | |
| return err, "β", "β", "" | |
| r = _compare(img1, img2) | |
| if not r["success"]: | |
| return _verdict_html(False, None, r["message"]), "β", "β", _details_html(r) | |
| return ( | |
| _verdict_html(True, r["match"], r["message"]), | |
| f"{r['score']:.4f}", | |
| f"{r['confidence']}%", | |
| _details_html(r), | |
| ) | |
| def _verdict_html(success: bool, match: Optional[bool], message: str) -> str: | |
| if not success: | |
| color, icon, label = "#c0392b", "β", "Error" | |
| elif match: | |
| color, icon, label = "#16a085", "β", "Match" | |
| else: | |
| color, icon, label = "#c0392b", "β", "No Match" | |
| bg = "#eafaf7" if (success and match) else "#fdf2f2" | |
| return f""" | |
| <div style="display:flex; align-items:center; gap:14px; padding:18px 24px; | |
| border-radius:10px; border-left:4px solid {color}; background:{bg}; | |
| font-family:'DM Sans',sans-serif;"> | |
| <span style="font-size:28px; color:{color}; font-weight:700;">{icon}</span> | |
| <div> | |
| <div style="font-size:19px; font-weight:700; color:{color};">{label}</div> | |
| <div style="font-size:13px; color:#555; margin-top:2px;">{message}</div> | |
| </div> | |
| </div>""" | |
| def _details_html(r: dict) -> str: | |
| if not r.get("success"): | |
| return (f'<div style="font-family:monospace; font-size:12px; color:#888;' | |
| f'padding:10px 14px; background:#f9f9f9; border-radius:6px;">' | |
| f'{r["message"]}</div>') | |
| bar_pct = min(int(r["confidence"]), 100) | |
| bar_color = "#16a085" if r["match"] else "#c0392b" | |
| return f""" | |
| <div style="font-family:'DM Sans',sans-serif; font-size:13px; color:#333;"> | |
| <div style="display:flex; gap:32px; margin-bottom:14px;"> | |
| <div> | |
| <div style="font-size:11px; color:#888; text-transform:uppercase; letter-spacing:.06em;">Similarity score</div> | |
| <div style="font-size:22px; font-weight:700; color:#111;">{r['score']}</div> | |
| </div> | |
| <div> | |
| <div style="font-size:11px; color:#888; text-transform:uppercase; letter-spacing:.06em;">Threshold</div> | |
| <div style="font-size:22px; font-weight:700; color:#111;">{THRESHOLD}</div> | |
| </div> | |
| <div> | |
| <div style="font-size:11px; color:#888; text-transform:uppercase; letter-spacing:.06em;">Method</div> | |
| <div style="font-size:13px; font-weight:500; color:#555; padding-top:5px;">Augmented cosine<br>(3Γ3 pairs)</div> | |
| </div> | |
| </div> | |
| <div style="margin-bottom:6px;"> | |
| <div style="display:flex; justify-content:space-between; margin-bottom:4px;"> | |
| <span style="font-size:11px; color:#888; text-transform:uppercase; letter-spacing:.06em;">Confidence</span> | |
| <span style="font-size:13px; font-weight:700; color:{bar_color};">{r['confidence']}%</span> | |
| </div> | |
| <div style="height:6px; background:#e8e8e8; border-radius:4px; overflow:hidden;"> | |
| <div style="height:100%; width:{bar_pct}%; background:{bar_color}; | |
| border-radius:4px; transition:width .4s;"></div> | |
| </div> | |
| </div> | |
| <div style="display:flex; gap:24px; margin-top:14px; padding-top:14px; | |
| border-top:1px solid #ececec;"> | |
| <div style="font-size:11px; color:#888;"> | |
| Detection confidence β image 1: <strong style="color:#333;">{r['det1']}</strong> | |
| </div> | |
| <div style="font-size:11px; color:#888;"> | |
| Detection confidence β image 2: <strong style="color:#333;">{r['det2']}</strong> | |
| </div> | |
| </div> | |
| </div>""" | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # CSS | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@400;500;700&family=DM+Mono:wght@400;500&display=swap'); | |
| body, .gradio-container { | |
| font-family: 'DM Sans', sans-serif !important; | |
| background: #f7f7f5 !important; | |
| } | |
| .top-bar { | |
| background: #0d0d0d; color: #fff; | |
| padding: 18px 28px 14px; border-radius: 12px; margin-bottom: 4px; | |
| } | |
| .top-bar h1 { font-size: 22px; font-weight: 700; margin: 0 0 4px; letter-spacing: -0.02em; } | |
| .top-bar .badges { display: flex; gap: 8px; margin-top: 10px; flex-wrap: wrap; } | |
| .top-bar .badge { | |
| font-family: 'DM Mono', monospace; font-size: 10px; | |
| padding: 3px 9px; border: 1px solid #333; border-radius: 20px; color: #aaa; | |
| } | |
| .upload-panel { background: #fff; border: 1px solid #e5e5e5; border-radius: 12px; overflow: hidden; } | |
| .upload-label { | |
| font-size: 11px; font-weight: 700; letter-spacing: .08em; | |
| text-transform: uppercase; color: #888; padding: 10px 14px 0; | |
| font-family: 'DM Mono', monospace; | |
| } | |
| .results-label { | |
| font-size: 11px; font-weight: 700; text-transform: uppercase; | |
| letter-spacing: .08em; color: #bbb; margin-bottom: 10px; | |
| font-family: 'DM Mono', monospace; | |
| } | |
| .run-btn { | |
| background: #0d0d0d !important; color: #fff !important; | |
| border: none !important; border-radius: 8px !important; | |
| font-family: 'DM Sans', sans-serif !important; font-weight: 700 !important; | |
| font-size: 14px !important; padding: 12px 0 !important; | |
| width: 100% !important; cursor: pointer !important; letter-spacing: 0.01em !important; | |
| } | |
| .run-btn:hover { background: #1a1a1a !important; } | |
| .clear-btn { | |
| background: transparent !important; color: #888 !important; | |
| border: 1px solid #ddd !important; border-radius: 8px !important; | |
| font-family: 'DM Mono', monospace !important; font-size: 12px !important; | |
| } | |
| footer { display: none !important; } | |
| .svelte-1gfkn6j { display: none !important; } | |
| input[type=number] { display: none; } | |
| label span { font-family: 'DM Mono', monospace; font-size: 11px !important; color: #888 !important; } | |
| """ | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Gradio UI | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_ui(): | |
| with gr.Blocks(css=CSS, title="Facial Comparison") as demo: | |
| gr.HTML(""" | |
| <div class="top-bar"> | |
| <h1>Facial Comparison</h1> | |
| <p style="color:#ccc; font-size:13px; font-family:'DM Sans',sans-serif; | |
| font-weight:400; margin-bottom:10px; line-height:1.6;"> | |
| Verify whether two faces belong to the same person β works on portraits, selfies, | |
| and identity documents (CNIC, passport). The system automatically extracts the face | |
| from an ID card and compares it against a live photo. | |
| Deployed across <strong style="color:#fff;">40+ financial institutions</strong> for | |
| customer onboarding and fraud prevention. | |
| </p> | |
| <div class="badges"> | |
| <span class="badge">face matching</span> | |
| <span class="badge">ID card face extraction</span> | |
| <span class="badge">liveness-aware</span> | |
| <span class="badge">occlusion handling</span> | |
| <span class="badge">production-grade</span> | |
| </div> | |
| <div style="margin-top:12px; display:flex; align-items:center; gap:6px; | |
| font-family:'DM Mono',monospace; font-size:10px; color:#555;"> | |
| <span style="display:inline-block; width:7px; height:7px; border-radius:50%; | |
| background:#22c55e; flex-shrink:0;"></span> | |
| No images are stored, logged, or transmitted beyond this session. | |
| Your data never leaves inference memory. | |
| </div> | |
| </div> | |
| """) | |
| with gr.Row(equal_height=True): | |
| with gr.Column(scale=5): | |
| with gr.Row(equal_height=True): | |
| with gr.Column(): | |
| gr.HTML('<div class="upload-label">Image 1</div>') | |
| img1 = gr.Image(label="", type="numpy", | |
| sources=["upload", "clipboard"], | |
| height=260, elem_classes=["upload-panel"]) | |
| with gr.Column(): | |
| gr.HTML('<div class="upload-label">Image 2</div>') | |
| img2 = gr.Image(label="", type="numpy", | |
| sources=["upload", "clipboard"], | |
| height=260, elem_classes=["upload-panel"]) | |
| with gr.Row(): | |
| clear_btn = gr.Button("Clear", elem_classes=["clear-btn"]) | |
| run_btn = gr.Button("Compare β", elem_classes=["run-btn"]) | |
| gr.HTML(""" | |
| <div style="margin-top:10px; padding:12px 16px; background:#fff; | |
| border:1px solid #ececec; border-radius:10px; | |
| font-family:'DM Mono',monospace; font-size:11px; | |
| color:#aaa; line-height:1.8;"> | |
| <strong style="color:#555;">Supported inputs</strong><br> | |
| Portrait photo Β· Selfie Β· ID card (face auto-extracted) Β· Passport photo page<br><br> | |
| <strong style="color:#555;">How to use</strong><br> | |
| Upload any two images β the system locates and extracts the face from each, | |
| then computes a match score and confidence percentage. | |
| </div> | |
| """) | |
| with gr.Column(scale=4): | |
| gr.HTML('<div class="results-label" style="margin-bottom:6px;">Result</div>') | |
| verdict_html = gr.HTML( | |
| value='<div style="height:72px; background:#f7f7f5; border-radius:10px;' | |
| 'border:1px dashed #ddd; display:flex; align-items:center;' | |
| 'justify-content:center; color:#ccc; font-size:13px;' | |
| 'font-family:DM Mono,monospace;">awaiting input</div>' | |
| ) | |
| with gr.Row(): | |
| score_out = gr.Label(label="Similarity score") | |
| conf_out = gr.Label(label="Confidence") | |
| gr.HTML('<div class="results-label" style="margin:10px 0 6px;">Details</div>') | |
| details_html = gr.HTML( | |
| value='<div style="height:80px; background:#f7f7f5; border-radius:8px;' | |
| 'border:1px dashed #ddd;"></div>' | |
| ) | |
| with gr.Accordion("How it works", open=False): | |
| gr.HTML(""" | |
| <div style="font-family:'DM Sans',sans-serif; font-size:13px; | |
| color:#555; line-height:1.8; padding:4px 0;"> | |
| <strong>1. Face extraction</strong> β The system automatically locates | |
| every face in the uploaded image, including faces embedded in identity | |
| documents like CNICs and passports. No manual cropping required.<br><br> | |
| <strong>2. Alignment</strong> β Each detected face is geometrically | |
| normalised to a canonical frontal pose using facial landmark positions, | |
| making the comparison robust to head tilt, lighting, and image angle.<br><br> | |
| <strong>3. Feature encoding</strong> β The aligned face is passed through | |
| a deep neural network (custom-trained) that compresses it into a compact | |
| numerical representation capturing unique facial geometry.<br><br> | |
| <strong>4. Robust matching</strong> β Multiple augmented versions of each | |
| face are compared, and the results are averaged to produce a stable | |
| similarity score resilient to minor image quality variations.<br><br> | |
| <strong>5. Confidence scoring</strong> β The similarity score is converted | |
| into an intuitive 0β99.9% confidence value along with a clear | |
| Match / No Match verdict.<br><br> | |
| <strong style="color:#16a085;">Privacy</strong> β All processing happens | |
| entirely within the inference session. No image, face crop, score, or | |
| metadata is written to disk, logged, or sent to any external service. | |
| Once your session ends, nothing is retained. | |
| </div> | |
| """) | |
| run_btn.click( | |
| fn=run_comparison, | |
| inputs=[img1, img2], | |
| outputs=[verdict_html, score_out, conf_out, details_html], | |
| ) | |
| clear_btn.click( | |
| fn=lambda: ( | |
| None, None, | |
| '<div style="height:72px; background:#f7f7f5; border-radius:10px;' | |
| 'border:1px dashed #ddd; display:flex; align-items:center;' | |
| 'justify-content:center; color:#ccc; font-size:13px;' | |
| 'font-family:DM Mono,monospace;">awaiting input</div>', | |
| "β", "β", | |
| '<div style="height:80px; background:#f7f7f5; border-radius:8px;' | |
| 'border:1px dashed #ddd;"></div>', | |
| ), | |
| outputs=[img1, img2, verdict_html, score_out, conf_out, details_html], | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| ui = build_ui() | |
| ui.launch(server_name="0.0.0.0", server_port=7860, show_error=True) |