Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

handler.py +81 -0
models.py +197 -0
requirements.txt +5 -0
utils.py +37 -0

handler.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from typing import Dict, List, Any
+from io import BytesIO
+from PIL import Image
+import torch
+import base64
+import numpy as np
+import cv2
+import albumentations as A
+from albumentations.pytorch import ToTensorV2
+from safetensors.torch import load_file
+# Import your model definition
+from models import DeepfakeDetector
+class EndpointHandler:
+    def __init__(self, path="."):
+        # Load model definition
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.device = device
+        self.model = DeepfakeDetector(pretrained=False) # Architecture only
+        # Load weights
+        try:
+            # Try loading safetensors
+            state_dict = load_file(f"{path}/best_model.safetensors")
+            self.model.load_state_dict(state_dict, strict=False)
+        except Exception as e:
+            print(f"Error loading weights: {e}")
+            # Fallback path if necessary
+            state_dict = load_file("best_model.safetensors")
+            self.model.load_state_dict(state_dict, strict=False)
+        self.model.to(device)
+        self.model.eval()
+        # Define transform
+        self.transform = A.Compose([
+            A.Resize(224, 224),
+            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
+            ToTensorV2(),
+        ])
+    def __call__(self, data: Any) -> List[Dict[str, Any]]:
+        inputs = data.pop("inputs", data)
+        # Decode image
+        image = None
+        if isinstance(inputs, Image.Image):
+            image = inputs
+        elif isinstance(inputs, str):
+            # Try base64
+            try:
+                if "base64," in inputs:
+                    inputs = inputs.split("base64,")[1]
+                image_bytes = base64.b64decode(inputs)
+                image = Image.open(BytesIO(image_bytes))
+            except:
+                # Url?
+                pass
+        elif isinstance(inputs, bytes):
+            image = Image.open(BytesIO(inputs))
+        if image is None:
+             return [{"error": "Invalid input format"}]
+        image = image.convert("RGB")
+        image_np = np.array(image)
+        # Augmentations expect numpy array
+        augmented = self.transform(image=image_np)
+        image_tensor = augmented['image'].unsqueeze(0).to(self.device)
+        # Inference
+        with torch.no_grad():
+            output = self.model(image_tensor)
+            prob = torch.sigmoid(output).item()
+        label = "FAKE" if prob > 0.5 else "REAL"
+        score = prob if prob > 0.5 else 1 - prob
+        return [{"label": label, "score": score}]

models.py ADDED Viewed

	@@ -0,0 +1,197 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as models
+import numpy as np
+from src.utils import get_fft_feature
+class RGBBranch(nn.Module):
+    def __init__(self, pretrained=True):
+        super().__init__()
+        # EfficientNet V2 Small: Robust and efficient spatial features
+        weights = models.EfficientNet_V2_S_Weights.DEFAULT if pretrained else None
+        self.net = models.efficientnet_v2_s(weights=weights)
+        # Extract features before classification head
+        self.features = self.net.features
+        self.avgpool = self.net.avgpool
+        self.out_dim = 1280
+    def forward(self, x):
+        x = self.features(x)
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+        return x
+class FreqBranch(nn.Module):
+    def __init__(self):
+        super().__init__()
+        # Simple CNN to analyze frequency domain patterns
+        self.net = nn.Sequential(
+            nn.Conv2d(3, 32, kernel_size=3, padding=1),
+            nn.BatchNorm2d(32),
+            nn.ReLU(),
+            nn.MaxPool2d(2),
+            nn.Conv2d(32, 64, kernel_size=3, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(),
+            nn.MaxPool2d(2),
+            nn.Conv2d(64, 128, kernel_size=3, padding=1),
+            nn.BatchNorm2d(128),
+            nn.ReLU(),
+            nn.AdaptiveAvgPool2d((1,1))
+        )
+        self.out_dim = 128
+    def forward(self, x):
+        return torch.flatten(self.net(x), 1)
+class PatchBranch(nn.Module):
+    def __init__(self):
+        super().__init__()
+        # Analyzes local patches for inconsistencies
+        # Shared lightweight CNN for each patch
+        self.patch_encoder = nn.Sequential(
+            nn.Conv2d(3, 16, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2), # 64 -> 32
+            nn.Conv2d(16, 32, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2), # 32 -> 16
+            nn.Conv2d(32, 64, kernel_size=3, padding=1),
+            nn.ReLU(),
+            nn.AdaptiveAvgPool2d((1,1))
+        )
+        self.out_dim = 64
+    def forward(self, x):
+        # x: (B, 3, 256, 256)
+        # Create 4x4=16 patches of size 64x64
+        # Unfold logic: kernel_size=64, stride=64
+        patches = x.unfold(2, 64, 64).unfold(3, 64, 64)
+        # patches shape: (B, 3, 4, 4, 64, 64)
+        B, C, H_grid, W_grid, H_patch, W_patch = patches.shape
+        # Merge batch and grid dimensions for parallel processing
+        patches = patches.permute(0, 2, 3, 1, 4, 5).contiguous()
+        patches = patches.view(B * H_grid * W_grid, C, H_patch, W_patch)
+        # Encode
+        feats = self.patch_encoder(patches) # (B*16, 64, 1, 1)
+        feats = torch.flatten(feats, 1) # (B*16, 64)
+        # Aggregate back to B
+        feats = feats.view(B, H_grid * W_grid, -1) # (B, 16, 64)
+        # Max pool over patches to capture the "most fake" patch signal
+        feats_max, _ = torch.max(feats, dim=1) # (B, 64)
+        return feats_max
+class ViTBranch(nn.Module):
+    def __init__(self, pretrained=True):
+        super().__init__()
+        # Swin Transformer Tiny: Capture long-range dependencies
+        weights = models.Swin_V2_T_Weights.DEFAULT if pretrained else None
+        self.net = models.swin_v2_t(weights=weights)
+        # Replace head with Identity to get features
+        self.out_dim = self.net.head.in_features
+        self.net.head = nn.Identity()
+    def forward(self, x):
+        return self.net(x)
+class DeepfakeDetector(nn.Module):
+    def __init__(self, pretrained=True):
+        super().__init__()
+        self.rgb_branch = RGBBranch(pretrained)
+        self.freq_branch = FreqBranch()
+        self.patch_branch = PatchBranch()
+        self.vit_branch = ViTBranch(pretrained)
+        input_dim = (self.rgb_branch.out_dim +
+                     self.freq_branch.out_dim +
+                     self.patch_branch.out_dim +
+                     self.vit_branch.out_dim)
+        # Confidence-based fusion head
+        self.classifier = nn.Sequential(
+            nn.Linear(input_dim, 512),
+            nn.BatchNorm1d(512),
+            nn.ReLU(),
+            nn.Dropout(0.5),
+            nn.Linear(512, 1)
+        )
+    def forward(self, x):
+        # 1. Spatial Analysis
+        rgb_feat = self.rgb_branch(x)
+        # 2. Frequency Analysis
+        freq_img = get_fft_feature(x)
+        freq_feat = self.freq_branch(freq_img)
+        # 3. Patch Analysis (Local Inconsistencies)
+        patch_feat = self.patch_branch(x)
+        # 4. Global Consistency (ViT)
+        vit_feat = self.vit_branch(x)
+        # 5. Feature Fusion
+        combined = torch.cat([rgb_feat, freq_feat, patch_feat, vit_feat], dim=1)
+        return self.classifier(combined)
+    def get_heatmap(self, x):
+        """Generate Grad-CAM heatmap for the input image"""
+        # We'll use the RGB branch for visualization as it contains spatial features
+        # Enable gradients for the input if needed, though typically we hook into layers
+        # 1. Forward pass through RGB branch
+        # We need to register a hook on the last conv layer of the efficientnet features
+        # Target layer: self.rgb_branch.features[-1] (the last block)
+        gradients = []
+        activations = []
+        def backward_hook(module, grad_input, grad_output):
+            gradients.append(grad_output[0])
+        def forward_hook(module, input, output):
+            activations.append(output)
+        # Register hooks on the last convolutional layer of RGB branch
+        target_layer = self.rgb_branch.features[-1]
+        hook_b = target_layer.register_full_backward_hook(backward_hook)
+        hook_f = target_layer.register_forward_hook(forward_hook)
+        # Forward pass
+        logits = self(x)
+        pred_idx = 0 # Binary classification, output is scalar logic
+        # Backward pass
+        self.zero_grad()
+        logits.backward(retain_graph=True)
+        # Get gradients and activations
+        pooled_gradients = torch.mean(gradients[0], dim=[0, 2, 3])
+        activation = activations[0][0]
+        # Weight activations by gradients (Grad-CAM)
+        for i in range(activation.shape[0]):
+            activation[i, :, :] *= pooled_gradients[i]
+        heatmap = torch.mean(activation, dim=0).cpu().detach().numpy()
+        heatmap = np.maximum(heatmap, 0) # ReLU
+        # Normalize
+        if np.max(heatmap) != 0:
+            heatmap /= np.max(heatmap)
+        # Remove hooks
+        hook_b.remove()
+        hook_f.remove()
+        return heatmap

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch
+torchvision
+albumentations
+safetensors
+opencv-python-headless

utils.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import torch
+import numpy as np
+import cv2
+def get_fft_feature(x):
+    """
+    Computes the Log-Magnitude Spectrum of the input images.
+    Args:
+        x (torch.Tensor): Input images of shape (B, C, H, W)
+    Returns:
+        torch.Tensor: Log-magnitude spectrum of shape (B, C, H, W)
+    """
+    if x.dim() == 3:
+        x = x.unsqueeze(0)
+    # Compute 2D FFT
+    fft = torch.fft.fft2(x, norm='ortho')
+    # Compute magnitude
+    mag = torch.abs(fft)
+    # Apply log scale (add epsilon for stability)
+    mag = torch.log(mag + 1e-6)
+    # Shift zero-frequency component to the center of the spectrum
+    mag = torch.fft.fftshift(mag, dim=(-2, -1))
+    return mag
+def min_max_normalize(tensor):
+    """
+    Min-max normalization for visualization or stable training provided tensor.
+    """
+    min_val = tensor.min()
+    max_val = tensor.max()
+    return (tensor - min_val) / (max_val - min_val + 1e-8)