Spaces:
Sleeping
Sleeping
Upload 14 files
Browse files- app.py +127 -0
- custom_tools/.DS_Store +0 -0
- custom_tools/__init__.py +13 -0
- custom_tools/__pycache__/__init__.cpython-311.pyc +0 -0
- custom_tools/__pycache__/facesimilarity.cpython-311.pyc +0 -0
- custom_tools/__pycache__/facesimilarity_tool.cpython-311.pyc +0 -0
- custom_tools/__pycache__/image_description_tool.cpython-311.pyc +0 -0
- custom_tools/__pycache__/liveness_detection_tool.cpython-311.pyc +0 -0
- custom_tools/__pycache__/video_deepfake_tool.cpython-311.pyc +0 -0
- custom_tools/facesimilarity_tool.py +68 -0
- custom_tools/image_description_tool.py +46 -0
- custom_tools/liveness_detection_tool.py +41 -0
- custom_tools/video_deepfake_tool.py +78 -0
- requirements.txt +7 -0
app.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import cv2
|
| 4 |
+
import tempfile
|
| 5 |
+
from PIL import Image
|
| 6 |
+
import numpy as np
|
| 7 |
+
import time
|
| 8 |
+
from custom_tools.video_deepfake_tool import VideoDeepfakeTool
|
| 9 |
+
from custom_tools.liveness_detection_tool import LivenessDetectionTool
|
| 10 |
+
from custom_tools.image_description_tool import ImageDescriptionTool
|
| 11 |
+
from custom_tools.facesimilarity_tool import FaceSimilarityTool
|
| 12 |
+
|
| 13 |
+
# Initialize all tools directly - no need for mock tools with local processing
|
| 14 |
+
video_deepfake_tool = VideoDeepfakeTool()
|
| 15 |
+
liveness_detection_tool = LivenessDetectionTool()
|
| 16 |
+
image_description_tool = ImageDescriptionTool()
|
| 17 |
+
face_similarity_tool = FaceSimilarityTool() # Using InsightFace-based implementation
|
| 18 |
+
|
| 19 |
+
# Extract frames function
|
| 20 |
+
def extract_frames_from_video(video_path, num_frames=3):
|
| 21 |
+
"""Extract key frames from video for analysis"""
|
| 22 |
+
cap = cv2.VideoCapture(video_path)
|
| 23 |
+
if not cap.isOpened():
|
| 24 |
+
return []
|
| 25 |
+
|
| 26 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 27 |
+
frames_to_extract = [int(total_frames * i / (num_frames + 1)) for i in range(1, num_frames + 1)]
|
| 28 |
+
|
| 29 |
+
extracted_frames = []
|
| 30 |
+
for frame_idx in frames_to_extract:
|
| 31 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
|
| 32 |
+
ret, frame = cap.read()
|
| 33 |
+
if ret:
|
| 34 |
+
# Save frame to temporary file
|
| 35 |
+
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as temp_file:
|
| 36 |
+
frame_path = temp_file.name
|
| 37 |
+
cv2.imwrite(frame_path, frame)
|
| 38 |
+
extracted_frames.append(frame_path)
|
| 39 |
+
|
| 40 |
+
cap.release()
|
| 41 |
+
return extracted_frames
|
| 42 |
+
|
| 43 |
+
def process_kyc_verification(video_file, id_card_image):
|
| 44 |
+
"""Process complete KYC verification using all tools"""
|
| 45 |
+
results = []
|
| 46 |
+
frames = []
|
| 47 |
+
|
| 48 |
+
# Step 1: Save uploaded files
|
| 49 |
+
video_path = video_file.name if hasattr(video_file, 'name') else video_file
|
| 50 |
+
id_path = id_card_image.name if hasattr(id_card_image, 'name') else id_card_image
|
| 51 |
+
|
| 52 |
+
# Step 2: Document verification
|
| 53 |
+
results.append("## 🔍 Analyzing ID Card")
|
| 54 |
+
id_description = image_description_tool.apply(id_path)
|
| 55 |
+
results.append(f"**ID Card Analysis**: {id_description}")
|
| 56 |
+
|
| 57 |
+
# Step 3: Extract frames for analysis
|
| 58 |
+
results.append("\n## 🎥 Processing Video")
|
| 59 |
+
extracted_frames = extract_frames_from_video(video_path, 3)
|
| 60 |
+
frames = extracted_frames # Store for display
|
| 61 |
+
|
| 62 |
+
if not extracted_frames:
|
| 63 |
+
results.append("❌ Failed to extract frames from video")
|
| 64 |
+
return "\n".join(results), None, None
|
| 65 |
+
|
| 66 |
+
# Step 4: Deepfake detection on video
|
| 67 |
+
results.append("\n## 🎬 Video Deepfake Analysis")
|
| 68 |
+
video_deepfake_result = video_deepfake_tool.apply(video_path)
|
| 69 |
+
results.append(f"**Video Deepfake Check**: Processing complete - output saved to {video_deepfake_result}")
|
| 70 |
+
|
| 71 |
+
# Step 5: Liveness detection
|
| 72 |
+
results.append("\n## 👤 Liveness Detection")
|
| 73 |
+
liveness_result = liveness_detection_tool.apply(extracted_frames[0])
|
| 74 |
+
results.append(f"**Liveness Check**: {liveness_result}")
|
| 75 |
+
|
| 76 |
+
# Step 6: Face similarity check - now always available with InsightFace
|
| 77 |
+
results.append("\n## 👥 Face Matching")
|
| 78 |
+
face_sim_result = face_similarity_tool.apply(extracted_frames[0], id_path)
|
| 79 |
+
results.append(f"**Face Similarity**: {face_sim_result}")
|
| 80 |
+
|
| 81 |
+
# Step 7: Final verification
|
| 82 |
+
results.append("\n## 📊 Verification Summary")
|
| 83 |
+
|
| 84 |
+
# Check verification results
|
| 85 |
+
liveness_passed = "real" in liveness_result.lower()
|
| 86 |
+
face_match_passed = "match" in face_sim_result.lower() or "true" in face_sim_result.lower()
|
| 87 |
+
|
| 88 |
+
# Final verification decision with clear recommendation
|
| 89 |
+
if liveness_passed and face_match_passed:
|
| 90 |
+
results.append("✅ **KYC VERIFICATION SUCCESSFUL**")
|
| 91 |
+
results.append("All verification checks have passed. **You may proceed with the KYC verification process.**")
|
| 92 |
+
else:
|
| 93 |
+
results.append("❌ **KYC VERIFICATION FAILED**")
|
| 94 |
+
if not liveness_passed:
|
| 95 |
+
results.append("- Liveness check failed: The person in the video may not be real")
|
| 96 |
+
if not face_match_passed:
|
| 97 |
+
results.append("- Face matching failed: The person in the video doesn't match the ID card")
|
| 98 |
+
results.append("\n**RECOMMENDATION: DO NOT PROCEED WITH THIS KYC APPLICATION**")
|
| 99 |
+
results.append("The verification has detected potential fraud or identity mismatch issues.")
|
| 100 |
+
|
| 101 |
+
return "\n".join(results), video_deepfake_result, frames[0] if frames else None
|
| 102 |
+
|
| 103 |
+
# Create Gradio interface
|
| 104 |
+
with gr.Blocks(title="Advanced KYC Verification System") as demo:
|
| 105 |
+
gr.Markdown("# 🔐 Video KYC Verification System")
|
| 106 |
+
gr.Markdown("Upload a 10-second video of yourself and your ID card photo for verification.")
|
| 107 |
+
|
| 108 |
+
with gr.Row():
|
| 109 |
+
with gr.Column():
|
| 110 |
+
video_input = gr.Video(label="Upload 10-second video")
|
| 111 |
+
id_card = gr.Image(label="Upload ID Card Photo", type="filepath")
|
| 112 |
+
submit_btn = gr.Button("Start KYC Verification", variant="primary")
|
| 113 |
+
|
| 114 |
+
with gr.Column():
|
| 115 |
+
output_text = gr.Markdown(label="Verification Results")
|
| 116 |
+
output_video = gr.Video(label="Processed Video")
|
| 117 |
+
output_frame = gr.Image(label="Analyzed Frame")
|
| 118 |
+
|
| 119 |
+
submit_btn.click(
|
| 120 |
+
fn=process_kyc_verification,
|
| 121 |
+
inputs=[video_input, id_card],
|
| 122 |
+
outputs=[output_text, output_video, output_frame]
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
# Launch the app locally for testing
|
| 126 |
+
if __name__ == "__main__":
|
| 127 |
+
demo.launch()
|
custom_tools/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
custom_tools/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# custom_tools/__init__.py
|
| 2 |
+
|
| 3 |
+
from .video_deepfake_tool import VideoDeepfakeTool
|
| 4 |
+
from .liveness_detection_tool import LivenessDetectionTool
|
| 5 |
+
from .facesimilarity_tool import FaceSimilarityTool
|
| 6 |
+
from .image_description_tool import ImageDescriptionTool
|
| 7 |
+
|
| 8 |
+
__all__ = [
|
| 9 |
+
'VideoDeepfakeTool',
|
| 10 |
+
'LivenessDetectionTool',
|
| 11 |
+
'FaceSimilarityTool',
|
| 12 |
+
'ImageDescriptionTool'
|
| 13 |
+
]
|
custom_tools/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (553 Bytes). View file
|
|
|
custom_tools/__pycache__/facesimilarity.cpython-311.pyc
ADDED
|
Binary file (2.19 kB). View file
|
|
|
custom_tools/__pycache__/facesimilarity_tool.cpython-311.pyc
ADDED
|
Binary file (3.73 kB). View file
|
|
|
custom_tools/__pycache__/image_description_tool.cpython-311.pyc
ADDED
|
Binary file (3.41 kB). View file
|
|
|
custom_tools/__pycache__/liveness_detection_tool.cpython-311.pyc
ADDED
|
Binary file (3.11 kB). View file
|
|
|
custom_tools/__pycache__/video_deepfake_tool.cpython-311.pyc
ADDED
|
Binary file (5.47 kB). View file
|
|
|
custom_tools/facesimilarity_tool.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agentlego.tools import BaseTool
|
| 2 |
+
import os
|
| 3 |
+
import cv2
|
| 4 |
+
import numpy as np
|
| 5 |
+
from insightface.app import FaceAnalysis
|
| 6 |
+
|
| 7 |
+
class FaceSimilarityTool(BaseTool):
|
| 8 |
+
default_desc = 'Uses InsightFace to evaluate face similarity between two images.'
|
| 9 |
+
|
| 10 |
+
def __init__(self, threshold=0.5, use_gpu=False):
|
| 11 |
+
super().__init__()
|
| 12 |
+
# Initialize InsightFace
|
| 13 |
+
self.threshold = threshold
|
| 14 |
+
|
| 15 |
+
# Use GPU if available (ctx_id=0) or CPU (ctx_id=-1)
|
| 16 |
+
ctx_id = 0 if use_gpu else -1
|
| 17 |
+
|
| 18 |
+
# Initialize FaceAnalysis
|
| 19 |
+
print("Initializing InsightFace model...")
|
| 20 |
+
self.app = FaceAnalysis(name='/Users/hardiksharma/Downloads/buffalo_s')
|
| 21 |
+
self.app.prepare(ctx_id=ctx_id, det_size=(640, 640))
|
| 22 |
+
print("✅ InsightFace model initialized successfully")
|
| 23 |
+
|
| 24 |
+
def apply(self, img1_path: str, img2_path: str) -> str:
|
| 25 |
+
try:
|
| 26 |
+
# Validate file existence
|
| 27 |
+
if not os.path.exists(img1_path):
|
| 28 |
+
return f"Error: Image 1 not found at path: {img1_path}"
|
| 29 |
+
if not os.path.exists(img2_path):
|
| 30 |
+
return f"Error: Image 2 not found at path: {img2_path}"
|
| 31 |
+
|
| 32 |
+
# Load images
|
| 33 |
+
img1 = cv2.imread(img1_path)
|
| 34 |
+
img2 = cv2.imread(img2_path)
|
| 35 |
+
|
| 36 |
+
if img1 is None or img2 is None:
|
| 37 |
+
return f"Error: Failed to load one or both images"
|
| 38 |
+
|
| 39 |
+
# Get faces and embeddings
|
| 40 |
+
faces1 = self.app.get(img1)
|
| 41 |
+
faces2 = self.app.get(img2)
|
| 42 |
+
|
| 43 |
+
# Check for no faces detected
|
| 44 |
+
if len(faces1) == 0:
|
| 45 |
+
return "Face similarity result: **No match** (No faces detected in first image)"
|
| 46 |
+
if len(faces2) == 0:
|
| 47 |
+
return "Face similarity result: **No match** (No faces detected in second image)"
|
| 48 |
+
|
| 49 |
+
# Get first face from each image (use primary face if multiple detected)
|
| 50 |
+
embedding1 = faces1[0].embedding
|
| 51 |
+
embedding2 = faces2[0].embedding
|
| 52 |
+
|
| 53 |
+
# Compute cosine similarity
|
| 54 |
+
similarity = np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
|
| 55 |
+
|
| 56 |
+
# Interpret result
|
| 57 |
+
if similarity > self.threshold:
|
| 58 |
+
result = "Match detected"
|
| 59 |
+
match_text = "True"
|
| 60 |
+
else:
|
| 61 |
+
result = "No match"
|
| 62 |
+
match_text = "False"
|
| 63 |
+
|
| 64 |
+
# Format in a way similar to your original implementation
|
| 65 |
+
return f"Face similarity result: **{match_text}** ({result}, similarity score: {similarity:.4f})"
|
| 66 |
+
|
| 67 |
+
except Exception as e:
|
| 68 |
+
return f"Error during face similarity computation: {str(e)}"
|
custom_tools/image_description_tool.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agentlego.tools import BaseTool
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
class ImageDescriptionTool(BaseTool):
|
| 6 |
+
default_desc = 'Uses a pretrained VIT-GPT2 model to generate descriptions for images.'
|
| 7 |
+
|
| 8 |
+
def __init__(self):
|
| 9 |
+
super().__init__()
|
| 10 |
+
# Load models inside the class initialization
|
| 11 |
+
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
|
| 12 |
+
|
| 13 |
+
self.model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
| 14 |
+
self.feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
| 15 |
+
self.tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
| 16 |
+
|
| 17 |
+
# Set up device and generation parameters
|
| 18 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 19 |
+
self.model.to(self.device)
|
| 20 |
+
self.max_length = 16
|
| 21 |
+
self.num_beams = 4
|
| 22 |
+
self.gen_kwargs = {"max_length": self.max_length, "num_beams": self.num_beams}
|
| 23 |
+
|
| 24 |
+
def apply(self, image_path: str) -> str:
|
| 25 |
+
try:
|
| 26 |
+
# Open the image
|
| 27 |
+
image = Image.open(image_path)
|
| 28 |
+
if image.mode != "RGB":
|
| 29 |
+
image = image.convert(mode="RGB")
|
| 30 |
+
|
| 31 |
+
# Preprocess image
|
| 32 |
+
pixel_values = self.feature_extractor(images=[image], return_tensors="pt").pixel_values
|
| 33 |
+
pixel_values = pixel_values.to(self.device)
|
| 34 |
+
|
| 35 |
+
# Generate caption
|
| 36 |
+
with torch.no_grad():
|
| 37 |
+
output_ids = self.model.generate(pixel_values, **self.gen_kwargs)
|
| 38 |
+
|
| 39 |
+
# Decode prediction
|
| 40 |
+
pred = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 41 |
+
pred = pred.strip()
|
| 42 |
+
|
| 43 |
+
return f"Description: **{pred}** (generated with VIT-GPT2 model)"
|
| 44 |
+
|
| 45 |
+
except Exception as e:
|
| 46 |
+
return f"Error during image description: {str(e)}"
|
custom_tools/liveness_detection_tool.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agentlego.tools import BaseTool
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import torch
|
| 4 |
+
import tempfile
|
| 5 |
+
import cv2
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
class LivenessDetectionTool(BaseTool):
|
| 9 |
+
default_desc = 'Detects liveness in an image using a DinoV2 image classification model.'
|
| 10 |
+
|
| 11 |
+
def __init__(self):
|
| 12 |
+
super().__init__()
|
| 13 |
+
# Move model loading inside the class initialization
|
| 14 |
+
from transformers import AutoImageProcessor, AutoModelForImageClassification
|
| 15 |
+
self.processor = AutoImageProcessor.from_pretrained("nguyenkhoa/dinov2_Liveness_detection_v2.2.3")
|
| 16 |
+
self.model = AutoModelForImageClassification.from_pretrained("nguyenkhoa/dinov2_Liveness_detection_v2.2.3")
|
| 17 |
+
|
| 18 |
+
def apply(self, image_path: str) -> str:
|
| 19 |
+
try:
|
| 20 |
+
# Load image
|
| 21 |
+
image = Image.open(image_path).convert("RGB")
|
| 22 |
+
|
| 23 |
+
# Preprocess and infer
|
| 24 |
+
inputs = self.processor(images=image, return_tensors="pt")
|
| 25 |
+
with torch.no_grad():
|
| 26 |
+
outputs = self.model(**inputs)
|
| 27 |
+
|
| 28 |
+
logits = outputs.logits
|
| 29 |
+
probs = torch.nn.functional.softmax(logits, dim=-1)[0]
|
| 30 |
+
|
| 31 |
+
# Get prediction
|
| 32 |
+
predicted_class_idx = torch.argmax(probs).item()
|
| 33 |
+
predicted_label = self.model.config.id2label[predicted_class_idx]
|
| 34 |
+
confidence = round(probs[predicted_class_idx].item(), 4)
|
| 35 |
+
|
| 36 |
+
# Format result
|
| 37 |
+
result = f"Liveness: {predicted_label} (Confidence: {confidence})"
|
| 38 |
+
return result
|
| 39 |
+
|
| 40 |
+
except Exception as e:
|
| 41 |
+
return f"Error during liveness detection: {str(e)}"
|
custom_tools/video_deepfake_tool.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agentlego.tools import BaseTool
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import cv2
|
| 4 |
+
import torch
|
| 5 |
+
import tempfile
|
| 6 |
+
|
| 7 |
+
class VideoDeepfakeTool(BaseTool):
|
| 8 |
+
default_desc = 'Detects deepfakes in a video using a Hugging Face image classifier.'
|
| 9 |
+
|
| 10 |
+
def __init__(self):
|
| 11 |
+
super().__init__()
|
| 12 |
+
# Move imports and model loading inside initialization
|
| 13 |
+
from transformers import AutoImageProcessor, AutoModelForImageClassification
|
| 14 |
+
|
| 15 |
+
# Load Hugging Face model and processor
|
| 16 |
+
self.processor = AutoImageProcessor.from_pretrained("Smogy/SMOGY-Ai-images-detector")
|
| 17 |
+
self.model = AutoModelForImageClassification.from_pretrained("Smogy/SMOGY-Ai-images-detector")
|
| 18 |
+
|
| 19 |
+
# Load Haar cascade for face detection
|
| 20 |
+
self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
|
| 21 |
+
|
| 22 |
+
def apply(self, video_path: str) -> str:
|
| 23 |
+
try:
|
| 24 |
+
# Open input video
|
| 25 |
+
cap = cv2.VideoCapture(video_path)
|
| 26 |
+
if not cap.isOpened():
|
| 27 |
+
raise ValueError("Could not open video file")
|
| 28 |
+
|
| 29 |
+
# Get video properties
|
| 30 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 31 |
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 32 |
+
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 33 |
+
|
| 34 |
+
# Create temporary output video file
|
| 35 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file:
|
| 36 |
+
output_path = temp_file.name
|
| 37 |
+
|
| 38 |
+
# Initialize video writer
|
| 39 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
| 40 |
+
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
| 41 |
+
|
| 42 |
+
# Process each frame
|
| 43 |
+
while cap.isOpened():
|
| 44 |
+
ret, frame = cap.read()
|
| 45 |
+
if not ret:
|
| 46 |
+
break
|
| 47 |
+
|
| 48 |
+
# Convert to grayscale for face detection
|
| 49 |
+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 50 |
+
faces = self.face_cascade.detectMultiScale(gray, 1.1, 4)
|
| 51 |
+
|
| 52 |
+
# Detect and annotate faces
|
| 53 |
+
for (x, y, w, h) in faces:
|
| 54 |
+
face_img = frame[y:y+h, x:x+w]
|
| 55 |
+
face_pil = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB))
|
| 56 |
+
inputs = self.processor(images=face_pil, return_tensors="pt")
|
| 57 |
+
|
| 58 |
+
with torch.no_grad():
|
| 59 |
+
outputs = self.model(**inputs)
|
| 60 |
+
|
| 61 |
+
probs = torch.softmax(outputs.logits, dim=1)
|
| 62 |
+
idx = probs.argmax().item()
|
| 63 |
+
label = self.model.config.id2label[idx]
|
| 64 |
+
conf = probs[0, idx].item()
|
| 65 |
+
|
| 66 |
+
color = (0, 255, 0) if label == 'real' else (0, 0, 255)
|
| 67 |
+
cv2.rectangle(frame, (x, y), (x+w, y+h), color, 2)
|
| 68 |
+
cv2.putText(frame, f"{label} {conf:.2f}", (x, y-10),
|
| 69 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2, cv2.LINE_AA)
|
| 70 |
+
|
| 71 |
+
out.write(frame)
|
| 72 |
+
|
| 73 |
+
cap.release()
|
| 74 |
+
out.release()
|
| 75 |
+
return output_path
|
| 76 |
+
|
| 77 |
+
except Exception as e:
|
| 78 |
+
return f"Error during video deepfake detection: {str(e)}"
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
| 2 |
+
torch>=1.10.0
|
| 3 |
+
transformers>=4.30.0
|
| 4 |
+
Pillow>=9.0.0
|
| 5 |
+
opencv-python>=4.5.0
|
| 6 |
+
numpy>=1.20.0
|
| 7 |
+
gradio-client
|