Spaces:

reagvis
/

id-detector

Sleeping

App Files Files Community

reagvis commited on May 19, 2025

Commit

45419f0

verified ·

1 Parent(s): d6f3509

Upload 14 files

Browse files

Files changed (14) hide show

app.py +127 -0
custom_tools/.DS_Store +0 -0
custom_tools/__init__.py +13 -0
custom_tools/__pycache__/__init__.cpython-311.pyc +0 -0
custom_tools/__pycache__/facesimilarity.cpython-311.pyc +0 -0
custom_tools/__pycache__/facesimilarity_tool.cpython-311.pyc +0 -0
custom_tools/__pycache__/image_description_tool.cpython-311.pyc +0 -0
custom_tools/__pycache__/liveness_detection_tool.cpython-311.pyc +0 -0
custom_tools/__pycache__/video_deepfake_tool.cpython-311.pyc +0 -0
custom_tools/facesimilarity_tool.py +68 -0
custom_tools/image_description_tool.py +46 -0
custom_tools/liveness_detection_tool.py +41 -0
custom_tools/video_deepfake_tool.py +78 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import gradio as gr
+import os
+import cv2
+import tempfile
+from PIL import Image
+import numpy as np
+import time
+from custom_tools.video_deepfake_tool import VideoDeepfakeTool
+from custom_tools.liveness_detection_tool import LivenessDetectionTool
+from custom_tools.image_description_tool import ImageDescriptionTool
+from custom_tools.facesimilarity_tool import FaceSimilarityTool
+# Initialize all tools directly - no need for mock tools with local processing
+video_deepfake_tool = VideoDeepfakeTool()
+liveness_detection_tool = LivenessDetectionTool()
+image_description_tool = ImageDescriptionTool()
+face_similarity_tool = FaceSimilarityTool()  # Using InsightFace-based implementation
+# Extract frames function
+def extract_frames_from_video(video_path, num_frames=3):
+    """Extract key frames from video for analysis"""
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        return []
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    frames_to_extract = [int(total_frames * i / (num_frames + 1)) for i in range(1, num_frames + 1)]
+    extracted_frames = []
+    for frame_idx in frames_to_extract:
+        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
+        ret, frame = cap.read()
+        if ret:
+            # Save frame to temporary file
+            with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file:
+                frame_path = temp_file.name
+                cv2.imwrite(frame_path, frame)
+                extracted_frames.append(frame_path)
+    cap.release()
+    return extracted_frames
+def process_kyc_verification(video_file, id_card_image):
+    """Process complete KYC verification using all tools"""
+    results = []
+    frames = []
+    # Step 1: Save uploaded files
+    video_path = video_file.name if hasattr(video_file, 'name') else video_file
+    id_path = id_card_image.name if hasattr(id_card_image, 'name') else id_card_image
+    # Step 2: Document verification
+    results.append("## 🔍 Analyzing ID Card")
+    id_description = image_description_tool.apply(id_path)
+    results.append(f"**ID Card Analysis**: {id_description}")
+    # Step 3: Extract frames for analysis
+    results.append("\n## 🎥 Processing Video")
+    extracted_frames = extract_frames_from_video(video_path, 3)
+    frames = extracted_frames  # Store for display
+    if not extracted_frames:
+        results.append("❌ Failed to extract frames from video")
+        return "\n".join(results), None, None
+    # Step 4: Deepfake detection on video
+    results.append("\n## 🎬 Video Deepfake Analysis")
+    video_deepfake_result = video_deepfake_tool.apply(video_path)
+    results.append(f"**Video Deepfake Check**: Processing complete - output saved to {video_deepfake_result}")
+    # Step 5: Liveness detection
+    results.append("\n## 👤 Liveness Detection")
+    liveness_result = liveness_detection_tool.apply(extracted_frames[0])
+    results.append(f"**Liveness Check**: {liveness_result}")
+    # Step 6: Face similarity check - now always available with InsightFace
+    results.append("\n## 👥 Face Matching")
+    face_sim_result = face_similarity_tool.apply(extracted_frames[0], id_path)
+    results.append(f"**Face Similarity**: {face_sim_result}")
+    # Step 7: Final verification
+    results.append("\n## 📊 Verification Summary")
+    # Check verification results
+    liveness_passed = "real" in liveness_result.lower()
+    face_match_passed = "match" in face_sim_result.lower() or "true" in face_sim_result.lower()
+    # Final verification decision with clear recommendation
+    if liveness_passed and face_match_passed:
+        results.append("✅ **KYC VERIFICATION SUCCESSFUL**")
+        results.append("All verification checks have passed. **You may proceed with the KYC verification process.**")
+    else:
+        results.append("❌ **KYC VERIFICATION FAILED**")
+        if not liveness_passed:
+            results.append("- Liveness check failed: The person in the video may not be real")
+        if not face_match_passed:
+            results.append("- Face matching failed: The person in the video doesn't match the ID card")
+        results.append("\n**RECOMMENDATION: DO NOT PROCEED WITH THIS KYC APPLICATION**")
+        results.append("The verification has detected potential fraud or identity mismatch issues.")
+    return "\n".join(results), video_deepfake_result, frames[0] if frames else None
+# Create Gradio interface
+with gr.Blocks(title="Advanced KYC Verification System") as demo:
+    gr.Markdown("# 🔐 Video KYC Verification System")
+    gr.Markdown("Upload a 10-second video of yourself and your ID card photo for verification.")
+    with gr.Row():
+        with gr.Column():
+            video_input = gr.Video(label="Upload 10-second video")
+            id_card = gr.Image(label="Upload ID Card Photo", type="filepath")
+            submit_btn = gr.Button("Start KYC Verification", variant="primary")
+        with gr.Column():
+            output_text = gr.Markdown(label="Verification Results")
+            output_video = gr.Video(label="Processed Video")
+            output_frame = gr.Image(label="Analyzed Frame")
+    submit_btn.click(
+        fn=process_kyc_verification,
+        inputs=[video_input, id_card],
+        outputs=[output_text, output_video, output_frame]
+    )
+# Launch the app locally for testing
+if __name__ == "__main__":
+    demo.launch()

custom_tools/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

custom_tools/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# custom_tools/__init__.py
+from .video_deepfake_tool import VideoDeepfakeTool
+from .liveness_detection_tool import LivenessDetectionTool
+from .facesimilarity_tool import FaceSimilarityTool
+from .image_description_tool import ImageDescriptionTool
+__all__ = [
+    'VideoDeepfakeTool',
+    'LivenessDetectionTool',
+    'FaceSimilarityTool',
+    'ImageDescriptionTool'
+]

custom_tools/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (553 Bytes). View file

custom_tools/__pycache__/facesimilarity.cpython-311.pyc ADDED Viewed

Binary file (2.19 kB). View file

custom_tools/__pycache__/facesimilarity_tool.cpython-311.pyc ADDED Viewed

Binary file (3.73 kB). View file

custom_tools/__pycache__/image_description_tool.cpython-311.pyc ADDED Viewed

Binary file (3.41 kB). View file

custom_tools/__pycache__/liveness_detection_tool.cpython-311.pyc ADDED Viewed

Binary file (3.11 kB). View file

custom_tools/__pycache__/video_deepfake_tool.cpython-311.pyc ADDED Viewed

Binary file (5.47 kB). View file

custom_tools/facesimilarity_tool.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from agentlego.tools import BaseTool
+import os
+import cv2
+import numpy as np
+from insightface.app import FaceAnalysis
+class FaceSimilarityTool(BaseTool):
+    default_desc = 'Uses InsightFace to evaluate face similarity between two images.'
+    def __init__(self, threshold=0.5, use_gpu=False):
+        super().__init__()
+        # Initialize InsightFace
+        self.threshold = threshold
+        # Use GPU if available (ctx_id=0) or CPU (ctx_id=-1)
+        ctx_id = 0 if use_gpu else -1
+        # Initialize FaceAnalysis
+        print("Initializing InsightFace model...")
+        self.app = FaceAnalysis(name='/Users/hardiksharma/Downloads/buffalo_s')
+        self.app.prepare(ctx_id=ctx_id, det_size=(640, 640))
+        print("✅ InsightFace model initialized successfully")
+    def apply(self, img1_path: str, img2_path: str) -> str:
+        try:
+            # Validate file existence
+            if not os.path.exists(img1_path):
+                return f"Error: Image 1 not found at path: {img1_path}"
+            if not os.path.exists(img2_path):
+                return f"Error: Image 2 not found at path: {img2_path}"
+            # Load images
+            img1 = cv2.imread(img1_path)
+            img2 = cv2.imread(img2_path)
+            if img1 is None or img2 is None:
+                return f"Error: Failed to load one or both images"
+            # Get faces and embeddings
+            faces1 = self.app.get(img1)
+            faces2 = self.app.get(img2)
+            # Check for no faces detected
+            if len(faces1) == 0:
+                return "Face similarity result: **No match** (No faces detected in first image)"
+            if len(faces2) == 0:
+                return "Face similarity result: **No match** (No faces detected in second image)"
+            # Get first face from each image (use primary face if multiple detected)
+            embedding1 = faces1[0].embedding
+            embedding2 = faces2[0].embedding
+            # Compute cosine similarity
+            similarity = np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
+            # Interpret result
+            if similarity > self.threshold:
+                result = "Match detected"
+                match_text = "True"
+            else:
+                result = "No match"
+                match_text = "False"
+            # Format in a way similar to your original implementation
+            return f"Face similarity result: **{match_text}** ({result}, similarity score: {similarity:.4f})"
+        except Exception as e:
+            return f"Error during face similarity computation: {str(e)}"

custom_tools/image_description_tool.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from agentlego.tools import BaseTool
+from PIL import Image
+import torch
+class ImageDescriptionTool(BaseTool):
+    default_desc = 'Uses a pretrained VIT-GPT2 model to generate descriptions for images.'
+    def __init__(self):
+        super().__init__()
+        # Load models inside the class initialization
+        from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
+        self.model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+        self.feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+        self.tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+        # Set up device and generation parameters
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model.to(self.device)
+        self.max_length = 16
+        self.num_beams = 4
+        self.gen_kwargs = {"max_length": self.max_length, "num_beams": self.num_beams}
+    def apply(self, image_path: str) -> str:
+        try:
+            # Open the image
+            image = Image.open(image_path)
+            if image.mode != "RGB":
+                image = image.convert(mode="RGB")
+            # Preprocess image
+            pixel_values = self.feature_extractor(images=[image], return_tensors="pt").pixel_values
+            pixel_values = pixel_values.to(self.device)
+            # Generate caption
+            with torch.no_grad():
+                output_ids = self.model.generate(pixel_values, **self.gen_kwargs)
+            # Decode prediction
+            pred = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
+            pred = pred.strip()
+            return f"Description: **{pred}** (generated with VIT-GPT2 model)"
+        except Exception as e:
+            return f"Error during image description: {str(e)}"

custom_tools/liveness_detection_tool.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from agentlego.tools import BaseTool
+from PIL import Image
+import torch
+import tempfile
+import cv2
+import os
+class LivenessDetectionTool(BaseTool):
+    default_desc = 'Detects liveness in an image using a DinoV2 image classification model.'
+    def __init__(self):
+        super().__init__()
+        # Move model loading inside the class initialization
+        from transformers import AutoImageProcessor, AutoModelForImageClassification
+        self.processor = AutoImageProcessor.from_pretrained("nguyenkhoa/dinov2_Liveness_detection_v2.2.3")
+        self.model = AutoModelForImageClassification.from_pretrained("nguyenkhoa/dinov2_Liveness_detection_v2.2.3")
+    def apply(self, image_path: str) -> str:
+        try:
+            # Load image
+            image = Image.open(image_path).convert("RGB")
+            # Preprocess and infer
+            inputs = self.processor(images=image, return_tensors="pt")
+            with torch.no_grad():
+                outputs = self.model(**inputs)
+            logits = outputs.logits
+            probs = torch.nn.functional.softmax(logits, dim=-1)[0]
+            # Get prediction
+            predicted_class_idx = torch.argmax(probs).item()
+            predicted_label = self.model.config.id2label[predicted_class_idx]
+            confidence = round(probs[predicted_class_idx].item(), 4)
+            # Format result
+            result = f"Liveness: {predicted_label} (Confidence: {confidence})"
+            return result
+        except Exception as e:
+            return f"Error during liveness detection: {str(e)}"

custom_tools/video_deepfake_tool.py ADDED Viewed

	@@ -0,0 +1,78 @@

+from agentlego.tools import BaseTool
+from PIL import Image
+import cv2
+import torch
+import tempfile
+class VideoDeepfakeTool(BaseTool):
+    default_desc = 'Detects deepfakes in a video using a Hugging Face image classifier.'
+    def __init__(self):
+        super().__init__()
+        # Move imports and model loading inside initialization
+        from transformers import AutoImageProcessor, AutoModelForImageClassification
+        # Load Hugging Face model and processor
+        self.processor = AutoImageProcessor.from_pretrained("Smogy/SMOGY-Ai-images-detector")
+        self.model = AutoModelForImageClassification.from_pretrained("Smogy/SMOGY-Ai-images-detector")
+        # Load Haar cascade for face detection
+        self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
+    def apply(self, video_path: str) -> str:
+        try:
+            # Open input video
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                raise ValueError("Could not open video file")
+            # Get video properties
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            # Create temporary output video file
+            with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file:
+                output_path = temp_file.name
+            # Initialize video writer
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+            # Process each frame
+            while cap.isOpened():
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                # Convert to grayscale for face detection
+                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+                faces = self.face_cascade.detectMultiScale(gray, 1.1, 4)
+                # Detect and annotate faces
+                for (x, y, w, h) in faces:
+                    face_img = frame[y:y+h, x:x+w]
+                    face_pil = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB))
+                    inputs = self.processor(images=face_pil, return_tensors="pt")
+                    with torch.no_grad():
+                        outputs = self.model(**inputs)
+                    probs = torch.softmax(outputs.logits, dim=1)
+                    idx = probs.argmax().item()
+                    label = self.model.config.id2label[idx]
+                    conf = probs[0, idx].item()
+                    color = (0, 255, 0) if label == 'real' else (0, 0, 255)
+                    cv2.rectangle(frame, (x, y), (x+w, y+h), color, 2)
+                    cv2.putText(frame, f"{label} {conf:.2f}", (x, y-10),
+                                cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2, cv2.LINE_AA)
+                out.write(frame)
+            cap.release()
+            out.release()
+            return output_path
+        except Exception as e:
+            return f"Error during video deepfake detection: {str(e)}"

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio>=4.0.0
+torch>=1.10.0
+transformers>=4.30.0
+Pillow>=9.0.0
+opencv-python>=4.5.0
+numpy>=1.20.0
+gradio-client