Spaces:

ash12321
/

deepfake-detector-v13

Runtime error

App Files Files Community

ash12321 commited on Nov 28, 2025

Commit

74e3cab

verified ·

1 Parent(s): b4ebd38

Update app.py

Browse files

Files changed (1) hide show

app.py +337 -298

app.py CHANGED Viewed

@@ -1,10 +1,14 @@
 """
-═══════════════════════════════════════════════════════════════════════
-V13 DEEPFAKE DETECTOR - GRADIO APP
-═══════════════════════════════════════════════════════════════════════
-Upload an image and detect if it's real or AI-generated/deepfake
-Uses the best Model 3 (Swin-Large) with 99.96% accuracy
-═══════════════════════════════════════════════════════════════════════
 """
 import gradio as gr
@@ -13,337 +17,372 @@ import torch.nn as nn
 from torchvision import transforms
 from PIL import Image
 import timm
-import json
-from huggingface_hub import hf_hub_download
 import numpy as np
-print("🚀 Loading Deepfake Detector...")
-# ═══════════════════════════════════════════════════════════════════════
-# CONFIGURATION
-# ═══════════════════════════════════════════════════════════════════════
-REPO_ID = "ash12321/deepfake-detector-v13-optimized"
-MODEL_NUM = 1  # Using Model 1 (ConvNeXt - most reliable, 99.90% test F1)
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-print(f"Device: {device}")
-# ═══════════════════════════���═══════════════════════════════════════════
-# MODEL DEFINITION
-# ═══════════════════════════════════════════════════════════════════════
-class DeepfakeDetector(nn.Module):
-    def __init__(self, backbone_name, dropout=0.3, hidden_dim=512, use_batch_norm=True):
         super().__init__()
-        self.backbone = timm.create_model(backbone_name, pretrained=False, num_classes=0)
-        if hasattr(self.backbone, 'num_features'):
-            feat_dim = self.backbone.num_features
-        else:
-            with torch.no_grad():
-                feat_dim = self.backbone(torch.randn(1, 3, 224, 224)).shape[1]
-        if use_batch_norm:
-            self.classifier = nn.Sequential(
-                nn.Linear(feat_dim, hidden_dim),
-                nn.BatchNorm1d(hidden_dim),
-                nn.GELU(),
-                nn.Dropout(dropout),
-                nn.Linear(hidden_dim, hidden_dim // 4),
-                nn.BatchNorm1d(hidden_dim // 4),
-                nn.GELU(),
-                nn.Dropout(dropout * 0.5),
-                nn.Linear(hidden_dim // 4, 1)
-            )
         else:
-            self.classifier = nn.Sequential(
-                nn.Linear(feat_dim, hidden_dim),
-                nn.LayerNorm(hidden_dim),
-                nn.GELU(),
-                nn.Dropout(dropout),
-                nn.Linear(hidden_dim, hidden_dim // 4),
-                nn.LayerNorm(hidden_dim // 4),
-                nn.GELU(),
-                nn.Dropout(dropout * 0.5),
-                nn.Linear(hidden_dim // 4, 1)
-            )
     def forward(self, x):
-        features = self.backbone(x)
-        return self.classifier(features).squeeze(-1)
-# ═══════════════════════════════════════════════════════════════════════
-# LOAD MODEL
-# ═══════════════════════════════════════════════════════════════════════
-print("📥 Downloading model from HuggingFace...")
-# Download model files
-model_path = hf_hub_download(
-    repo_id=REPO_ID,
-    filename=f"best_model_{MODEL_NUM}.pt"
-)
-params_path = hf_hub_download(
-    repo_id=REPO_ID,
-    filename=f"best_params_model_{MODEL_NUM}.json"
-)
-# Load parameters
-with open(params_path, 'r') as f:
-    best_params = json.load(f)
-params = best_params['params']
-threshold = params['classification_threshold']
-print(f"✓ Using Model {MODEL_NUM}")
-print(f"✓ Threshold: {threshold:.4f}")
-print(f"✓ Test F1 Score: {best_params.get('f1_score', 'N/A')}")
-# Model architecture map
-backbone_map = {
-    1: 'convnext_large',
-    2: 'vit_large_patch16_224',
-    3: 'swin_large_patch4_window7_224'
-}
-# Create model
-print("🔨 Building model...")
-model = DeepfakeDetector(
-    backbone_name=backbone_map[MODEL_NUM],
-    dropout=params['dropout'],
-    hidden_dim=params['hidden_dim'],
-    use_batch_norm=params['use_batch_norm']
-)
-# Load weights
-checkpoint = torch.load(model_path, map_location=device)
-model.load_state_dict(checkpoint['model_state_dict'])
-model = model.to(device)
-model.eval()
-print("✅ Model loaded successfully!\n")
-# ═══════════════════════════════════════════════════════════════════════
-# IMAGE PREPROCESSING
-# ═══════════════════════════════════════════════════════════════════════
-transform = transforms.Compose([
-    transforms.Resize((224, 224)),
-    transforms.ToTensor(),
-    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
-])
-# ═══════════════════════════════════════════════════════════════════════
-# PREDICTION FUNCTION
-# ═══════════════════════════════════════════════════════════════════════
-def predict_image(image, custom_threshold=None):
     """
-    Predict if an image is real or fake
     Args:
         image: PIL Image
-        custom_threshold: Optional custom threshold (0-1)
     Returns:
-        dict: Prediction results with confidence scores
     """
-    if image is None:
-        return {
-            "Error": "Please upload an image"
-        }
-    # Use custom threshold if provided, otherwise use default
-    thresh = custom_threshold if custom_threshold is not None else threshold
-    try:
-        # Convert to RGB if needed
-        if image.mode != 'RGB':
-            image = image.convert('RGB')
-        # Preprocess
-        img_tensor = transform(image).unsqueeze(0).to(device)
-        # Predict
-        with torch.no_grad():
-            logit = model(img_tensor)
-            probability = torch.sigmoid(logit).item()
-        # Determine prediction
-        is_fake = probability > thresh
-        # Calculate confidence
-        if is_fake:
-            confidence = probability * 100
-            label = "🚨 FAKE / AI-GENERATED"
-            color = "red"
-        else:
-            confidence = (1 - probability) * 100
-            label = "✅ REAL"
-            color = "green"
-        # Create result dictionary for Gradio
-        result = {
-            "Prediction": label,
-            "Confidence": f"{confidence:.2f}%",
-            "Raw Score": f"{probability:.4f}",
-            "Threshold Used": f"{thresh:.4f}"
-        }
-        # Additional context
-        if confidence > 95:
-            certainty = "Very High Certainty"
-        elif confidence > 85:
-            certainty = "High Certainty"
-        elif confidence > 70:
-            certainty = "Moderate Certainty"
-        else:
-            certainty = "Low Certainty - Manual Review Recommended"
-        result["Certainty Level"] = certainty
-        return result
-    except Exception as e:
-        return {
-            "Error": f"Prediction failed: {str(e)}"
-        }
-# ═══════════════════════════════════════════════════════════════════════
-# GRADIO INTERFACE
-# ═══════════════════════════════════════════════════════════════════════
-# Create the interface
-with gr.Blocks() as demo:
-    gr.Markdown(
-        """
-        # 🔍 Deepfake Detector V13
-        Upload an image to detect if it's **REAL** or **AI-GENERATED/DEEPFAKE**
-        **Model Performance:**
-        - ✅ 99.96% Accuracy on test set
-        - ✅ 100% Recall (catches all fakes)
-        - ✅ Model 3: Swin-Large (197M parameters)
-        **Supported:** Faces, portraits, AI-generated images, deepfakes
-        """
-    )
-    with gr.Row():
-        with gr.Column():
-            image_input = gr.Image(
-                type="pil",
-                label="Upload Image",
-                height=400
-            )
-            threshold_slider = gr.Slider(
-                minimum=0.1,
-                maximum=0.9,
-                value=threshold,
-                step=0.05,
-                label="Detection Threshold (Lower = More Sensitive to Fakes)",
-                info="Adjust if getting too many false positives/negatives"
-            )
-            predict_btn = gr.Button(
-                "🔍 Analyze Image",
-                variant="primary",
-                size="lg"
-            )
-            gr.Markdown(
-                """
-                ### 💡 Tips:
-                - Upload clear images with visible faces
-                - Works best with portraits and headshots
-                - Supports: JPG, PNG, WebP
-                - **Adjust threshold if results seem off**
-                """
-            )
-        with gr.Column():
-            result_output = gr.JSON(
-                label="Detection Results"
-            )
-            gr.Markdown(
-                """
-                ### 📊 Understanding Results:
-                **Prediction:** REAL or FAKE classification
-                **Confidence:** How certain the model is (0-100%)
-                **Raw Score:** Internal probability (0-1)
-                - Above threshold → FAKE
-                - Below threshold → REAL
-                **Certainty Level:**
-                - Very High (>95%): Trust the result
-                - High (85-95%): Reliable
-                - Moderate (70-85%): Generally accurate
-                - Low (<70%): Consider manual review
-                """
-            )
-    # Examples
-    gr.Markdown("### 📸 Try These Examples:")
-    gr.Examples(
-        examples=[
-            # Add example image paths here if you have them
-        ],
-        inputs=image_input,
-        outputs=result_output,
-        fn=predict_image,
-        cache_examples=False
-    )
-    # Connect button to function
-    predict_btn.click(
-        fn=predict_image,
-        inputs=[image_input, threshold_slider],
-        outputs=result_output
-    )
-    # Auto-predict on upload
-    image_input.change(
-        fn=predict_image,
-        inputs=[image_input, threshold_slider],
-        outputs=result_output
-    )
-    gr.Markdown(
-        """
         ---
-        **Model Details:**
-        - Architecture: Swin Transformer Large
-        - Parameters: 197M
-        - Training Data: 60,000 balanced real/fake images
-        - Optimized with Optuna hyperparameter search
-        **Limitations:**
-        - Best for human faces and portraits
-        - May not work well on heavily compressed images
-        - Performance may vary on new AI generation methods
-        **Version:** V13 Model 3 | **Accuracy:** 99.96%
-        """
-    )
-# ═══════════════════════════════════════════════════════════════════════
-# LAUNCH
-# ═══════════════════════════════════════════════════════════════════════
 if __name__ == "__main__":
-    print("🌐 Launching Gradio interface...")
-    demo.launch(
-        share=True,  # Creates public link
-        server_name="0.0.0.0",
-        server_port=7860
-    )

 """
+Gradio App for One-Class Deepfake Detector
+This app loads the DeepSVDD model from HuggingFace and provides an interface to test images.
+Create a new Space on HuggingFace:
+1. Go to https://huggingface.co/spaces
+2. Click "Create new Space"
+3. Name it (e.g., "deepfake-detector-demo")
+4. Select SDK: Gradio
+5. Create the Space
+6. Upload this file as "app.py"
 """
 import gradio as gr
 from torchvision import transforms
 from PIL import Image
 import timm
 import numpy as np
+from huggingface_hub import hf_hub_download
+import json
+import warnings
+warnings.filterwarnings('ignore')
+# ==================== MODEL ARCHITECTURE ====================
+# Copy the same architecture classes from your training script
+class FrequencyFeatureExtractor:
+    @staticmethod
+    def extract_fft_features(image_tensor, n_features=64):
+        if image_tensor.shape[0] == 3:
+            gray = 0.299 * image_tensor[0] + 0.587 * image_tensor[1] + 0.114 * image_tensor[2]
+        else:
+            gray = image_tensor[0]
+        gray_np = gray.cpu().numpy()
+        fft = np.fft.fft2(gray_np)
+        fft_shift = np.fft.fftshift(fft)
+        magnitude = np.abs(fft_shift)
+        h, w = magnitude.shape
+        center_h, center_w = h // 2, w // 2
+        features = []
+        max_radius = min(center_h, center_w)
+        n_bins = int(np.sqrt(n_features))
+        for i in range(n_bins):
+            r_inner = int(i * max_radius / n_bins)
+            r_outer = int((i + 1) * max_radius / n_bins)
+            y, x = np.ogrid[-center_h:h-center_h, -center_w:w-center_w]
+            mask = (x*x + y*y >= r_inner*r_inner) & (x*x + y*y < r_outer*r_outer)
+            ring_values = magnitude[mask]
+            if len(ring_values) > 0:
+                features.extend([np.mean(ring_values), np.std(ring_values)])
+            else:
+                features.extend([0.0, 0.0])
+        features = features[:n_features]
+        if len(features) < n_features:
+            features.extend([0.0] * (n_features - len(features)))
+        return torch.tensor(features, dtype=torch.float32)
+class CNNEncoder(nn.Module):
+    def __init__(self, channels=[64, 128, 256, 512], output_dim=256, image_size=224):
+        super().__init__()
+        layers = []
+        in_channels = 3
+        for out_channels in channels:
+            layers.extend([
+                nn.Conv2d(in_channels, out_channels, 3, padding=1),
+                nn.BatchNorm2d(out_channels),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(out_channels, out_channels, 3, padding=1),
+                nn.BatchNorm2d(out_channels),
+                nn.ReLU(inplace=True),
+                nn.MaxPool2d(2, 2)
+            ])
+            in_channels = out_channels
+        self.conv_layers = nn.Sequential(*layers)
+        self.feature_size = channels[-1] * (image_size // (2 ** len(channels))) ** 2
+        self.fc = nn.Sequential(
+            nn.Linear(self.feature_size, 1024),
+            nn.ReLU(inplace=True),
+            nn.Dropout(0.3),
+            nn.Linear(1024, output_dim)
+        )
+    def forward(self, x):
+        x = self.conv_layers(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x
+class HybridEncoder(nn.Module):
+    def __init__(self, cnn_channels=[64, 128, 256, 512], vit_model="vit_small_patch16_224",
+                 embedding_dim=512, use_frequency=True, image_size=224):
         super().__init__()
+        self.use_frequency = use_frequency
+        self.cnn_encoder = CNNEncoder(channels=cnn_channels, output_dim=256, image_size=image_size)
+        self.vit = timm.create_model(vit_model, pretrained=False, num_classes=0)
+        vit_dim = self.vit.num_features
+        self.vit_projection = nn.Linear(vit_dim, 256)
+        if self.use_frequency:
+            self.freq_dim = 64
+            self.freq_projection = nn.Linear(self.freq_dim, 128)
+            fusion_dim = 256 + 256 + 128
         else:
+            fusion_dim = 256 + 256
+        self.fusion = nn.Sequential(
+            nn.Linear(fusion_dim, 512),
+            nn.ReLU(inplace=True),
+            nn.Dropout(0.3),
+            nn.Linear(512, embedding_dim),
+            nn.BatchNorm1d(embedding_dim)
+        )
+        self.freq_extractor = FrequencyFeatureExtractor()
     def forward(self, x):
+        batch_size = x.size(0)
+        cnn_features = self.cnn_encoder(x)
+        vit_features = self.vit(x)
+        vit_features = self.vit_projection(vit_features)
+        if self.use_frequency:
+            freq_features = []
+            for i in range(batch_size):
+                freq_feat = self.freq_extractor.extract_fft_features(x[i], self.freq_dim)
+                freq_features.append(freq_feat)
+            freq_features = torch.stack(freq_features).to(x.device)
+            freq_features = self.freq_projection(freq_features)
+            combined = torch.cat([cnn_features, vit_features, freq_features], dim=1)
+        else:
+            combined = torch.cat([cnn_features, vit_features], dim=1)
+        embeddings = self.fusion(combined)
+        return embeddings
+class DeepSVDD(nn.Module):
+    def __init__(self, embedding_dim=512, cnn_channels=[64, 128, 256, 512],
+                 vit_model="vit_small_patch16_224", use_frequency=True, image_size=224):
+        super().__init__()
+        self.encoder = HybridEncoder(
+            cnn_channels=cnn_channels,
+            vit_model=vit_model,
+            embedding_dim=embedding_dim,
+            use_frequency=use_frequency,
+            image_size=image_size
+        )
+        self.embedding_dim = embedding_dim
+        self.register_buffer('center', torch.zeros(embedding_dim))
+        self.radius = nn.Parameter(torch.tensor(0.0), requires_grad=False)
+    def forward(self, x):
+        embeddings = self.encoder(x)
+        return embeddings
+    def get_distance(self, embeddings):
+        return torch.sum((embeddings - self.center) ** 2, dim=1)
+# ==================== MODEL LOADING ====================
+@torch.no_grad()
+def load_model_from_hf(repo_id="ash12321/deepsvdd-model"):
+    """Load the DeepSVDD model from HuggingFace"""
+    print("Loading model from HuggingFace...")
+    # Download files
+    model_path = hf_hub_download(repo_id=repo_id, filename="deepsvdd_model.pth")
+    config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
+    # Load config
+    with open(config_path, 'r') as f:
+        config = json.load(f)
+    # Initialize model
+    model = DeepSVDD(
+        embedding_dim=config.get('EMBEDDING_DIM', 512),
+        cnn_channels=config.get('CNN_CHANNELS', [64, 128, 256, 512]),
+        vit_model=config.get('VIT_MODEL', 'vit_small_patch16_224'),
+        use_frequency=config.get('USE_FREQUENCY_FEATURES', True),
+        image_size=config.get('IMAGE_SIZE', 224)
+    )
+    # Load weights
+    checkpoint = torch.load(model_path, map_location='cpu')
+    model.load_state_dict(checkpoint['model_state_dict'])
+    model.center = checkpoint['center']
+    model.radius = checkpoint['radius']
+    model.eval()
+    print(f"✓ Model loaded successfully!")
+    print(f"  Hypersphere radius: {model.radius.item():.4f}")
+    print(f"  Center norm: {model.center.norm().item():.4f}")
+    return model, config
+# ==================== IMAGE PREPROCESSING ====================
+def preprocess_image(image):
+    """Preprocess PIL Image for model input"""
+    transform = transforms.Compose([
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    ])
+    # Convert to RGB if needed
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    image_tensor = transform(image).unsqueeze(0)
+    return image_tensor
+# ==================== PREDICTION FUNCTION ====================
+def predict_deepfake(image, threshold_multiplier=1.5):
     """
+    Predict if an image is a deepfake
     Args:
         image: PIL Image
+        threshold_multiplier: How many times the radius to use as threshold
     Returns:
+        prediction, confidence, distance, details
     """
+    # Preprocess
+    image_tensor = preprocess_image(image)
+    # Get embedding
+    with torch.no_grad():
+        embedding = model(image_tensor)
+        distance = model.get_distance(embedding).item()
+    # Calculate threshold
+    radius = model.radius.item()
+    threshold = radius * threshold_multiplier
+    # Make prediction
+    is_fake = distance > threshold
+    # Calculate confidence score (0-100%)
+    # Distance closer to center = more confident it's real
+    # Distance far from center = more confident it's fake
+    if is_fake:
+        # How far beyond threshold (0 = at threshold, 1+ = far beyond)
+        confidence = min(100, (distance - threshold) / threshold * 100)
+    else:
+        # How close to center (0 = at threshold, 100 = at center)
+        confidence = min(100, (1 - distance / threshold) * 100)
+    # Create result dictionary
+    prediction = "🚨 LIKELY FAKE" if is_fake else "✅ LIKELY REAL"
+    details = f"""
+**Hypersphere Distance:** {distance:.4f}
+**Detection Threshold:** {threshold:.4f}
+**Hypersphere Radius:** {radius:.4f}
+**How it works:**
+- Real images cluster tightly in embedding space (small distance)
+- Fake images fall outside this cluster (large distance)
+- This model was trained ONLY on real images using one-class learning
+"""
+    # Create confidence bar chart
+    confidence_text = f"{confidence:.1f}% Confidence"
+    return prediction, confidence_text, details
+# ==================== LOAD MODEL ====================
+print("Initializing Deepfake Detector...")
+model, config = load_model_from_hf("ash12321/deepsvdd-model")
+print("✓ Ready!")
+# ==================== GRADIO INTERFACE ====================
+def create_interface():
+    """Create Gradio interface"""
+    with gr.Blocks(title="One-Class Deepfake Detector", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("""
+        # 🔍 One-Class Deepfake Detector
+        This AI model detects deepfakes using **hypersphere-based anomaly detection** (DeepSVDD).
+        It was trained **exclusively on real images** and learns what "real" looks like in embedding space.
+        ### How it works:
+        1. Upload an image (photo, portrait, scene, etc.)
+        2. The model computes how far the image is from the "real image hypersphere"
+        3. Images far from the center are flagged as potential deepfakes
+        **Note:** This is a research model. Adjust the threshold slider to control sensitivity.
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                image_input = gr.Image(type="pil", label="Upload Image to Test")
+                threshold_slider = gr.Slider(
+                    minimum=1.0,
+                    maximum=3.0,
+                    value=1.5,
+                    step=0.1,
+                    label="Detection Threshold Multiplier",
+                    info="Higher = stricter (fewer false positives, more false negatives)"
+                )
+                submit_btn = gr.Button("🔍 Analyze Image", variant="primary", size="lg")
+                gr.Markdown("""
+                ### 💡 Tips:
+                - Works best on faces, portraits, and natural scenes
+                - Higher threshold = more conservative (flags only obvious fakes)
+                - Lower threshold = more aggressive (flags anything unusual)
+                - Default (1.5x) is a good starting point
+                """)
+            with gr.Column(scale=1):
+                prediction_output = gr.Textbox(label="Prediction", lines=2)
+                confidence_output = gr.Textbox(label="Confidence Score", lines=1)
+                details_output = gr.Markdown(label="Technical Details")
+        # Examples
+        gr.Markdown("### 📸 Try Example Images:")
+        gr.Examples(
+            examples=[
+                ["examples/real1.jpg", 1.5],
+                ["examples/real2.jpg", 1.5],
+                ["examples/fake1.jpg", 1.5],
+            ],
+            inputs=[image_input, threshold_slider],
+            label="Example Images"
+        )
+        # Connect button
+        submit_btn.click(
+            fn=predict_deepfake,
+            inputs=[image_input, threshold_slider],
+            outputs=[prediction_output, confidence_output, details_output]
+        )
+        gr.Markdown("""
         ---
+        ### 🧠 About the Model
+        **Architecture:** Hybrid CNN + Vision Transformer + FFT Frequency Features
+        **Training:** Trained on 50,000+ real images using DeepSVDD (Deep Support Vector Data Description)
+        **Method:** One-class learning - learns the distribution of real images only
+        **Novelty:** Unlike binary classifiers, this model doesn't learn specific fake patterns.
+        It learns what's "normal" and flags anything anomalous, making it more robust to new deepfake methods.
+        ---
+        **Model by:** [ash12321](https://huggingface.co/ash12321) |
+        **Source Code:** [GitHub](https://github.com/ash12321/deepfake-detector)
+        """)
+    return demo
+# ==================== LAUNCH ====================
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()