Spaces:

nahid112376
/

demectai

Runtime error

App Files Files Community

nahid112376 commited on Dec 2, 2025

Commit

3248dd8

0 Parent(s):

Initial deployment with trained models

Browse files

Files changed (9) hide show

.gitattributes +1 -0
README.md +118 -0
app.py +266 -0
app_gradio_ui.py +528 -0
meta_model.pkl +0 -0
requirements.txt +20 -0
resnet1d_best.pth +3 -0
save_models_for_deploy.py +133 -0
tcn_best.pth +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.pth filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,118 @@

+---
+title: AI Image Detector
+emoji: 🔍
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 4.0.0
+app_file: app.py
+pinned: false
+license: mit
+---
+# 🔍 AI Image Detector
+Detect whether an image is **AI-generated** or a **real photograph** using a stacking ensemble of deep learning models.
+## 🏗️ Architecture
+This detector uses a **two-stage pipeline**:
+### Stage 1: Feature Extraction
+- **Qwen2.5-VL-3B** vision-language model extracts spatial features from input images
+- Features preserve spatial relationships and semantic information
+### Stage 2: Classification (Stacking Ensemble)
+- **TCN** (Temporal Convolutional Network) - captures sequential patterns in spatial features
+- **ResNet-1D** (Deep Residual Network) - learns hierarchical representations
+- **Meta-Learner** (Logistic Regression) - combines base model predictions for final verdict
+## 📊 Performance
+| Model | Accuracy | F1 Score | AUC-ROC |
+|-------|----------|----------|---------|
+| TCN | 96.64% | 96.81% | 0.9851 |
+| ResNet-1D | 96.76% | 96.90% | 0.9867 |
+| **Stacking Ensemble** | **97.18%** | **97.25%** | **0.9892** |
+## 🚀 Usage
+### Web Interface
+Simply upload an image and click "Detect" to get:
+- Prediction (AI Generated / Real)
+- Confidence score
+- Individual model predictions
+### API Usage
+```python
+from gradio_client import Client
+client = Client("your-username/ai-image-detector")
+result = client.predict(
+    image="path/to/your/image.jpg",
+    api_name="/detect"
+)
+print(result)
+```
+### Local Usage
+```python
+from app import AIImageDetector
+detector = AIImageDetector(models_dir="models")
+result = detector.predict("your_image.jpg")
+print(f"Prediction: {result['prediction']}")
+print(f"Confidence: {result['confidence']:.2%}")
+```
+## 📁 Model Files
+The following files are required in the `models/` directory:
+```
+models/
+├── tcn_best.pth          # Trained TCN model weights
+├── resnet1d_best.pth     # Trained ResNet-1D model weights
+├── meta_model.pkl        # Trained meta-learner (sklearn)
+└── config.pkl            # Model configuration (max_patches, hidden_dim)
+```
+## 🔧 Training
+This model was trained on a dataset of:
+- ~10,000 AI-generated images (from various generators)
+- ~10,000 real photographs
+Training pipeline:
+1. Extract spatial features using Qwen2.5-VL
+2. Train individual models (TCN, ResNet-1D)
+3. Train meta-learner on validation predictions
+4. Evaluate on held-out test set
+## ⚠️ Limitations
+- Best performance on images similar to training distribution
+- May struggle with:
+  - Very low resolution images
+  - Heavily compressed images
+  - Screenshots or digitally altered photos
+  - New AI generators not in training data
+## 📝 Citation
+If you use this model, please cite:
+```bibtex
+@misc{ai-image-detector-2024,
+  author = {Your Name},
+  title = {AI Image Detector: Stacking Ensemble for Detecting AI-Generated Images},
+  year = {2024},
+  publisher = {Hugging Face},
+  url = {https://huggingface.co/spaces/your-username/ai-image-detector}
+}
+```
+## 📄 License
+MIT License - See LICENSE file for details.

app.py ADDED Viewed

	@@ -0,0 +1,266 @@

+#!/usr/bin/env python3
+"""
+AI Image Detector - API Endpoint for Hugging Face Spaces
+Returns JSON with AI probability percentage
+"""
+import gradio as gr
+import torch
+import torch.nn as nn
+import numpy as np
+from PIL import Image
+import pickle
+import os
+# ==================== MODEL DEFINITIONS ====================
+class TemporalBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride, dilation, dropout=0.3):
+        super(TemporalBlock, self).__init__()
+        padding = (kernel_size - 1) * dilation
+        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation)
+        self.bn1 = nn.BatchNorm1d(out_channels)
+        self.relu1 = nn.ReLU()
+        self.dropout1 = nn.Dropout(dropout)
+        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation)
+        self.bn2 = nn.BatchNorm1d(out_channels)
+        self.relu2 = nn.ReLU()
+        self.dropout2 = nn.Dropout(dropout)
+        self.downsample = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else None
+        self.relu = nn.ReLU()
+    def forward(self, x):
+        out = self.conv1(x)
+        out = out[:, :, :-self.conv1.padding[0]] if self.conv1.padding[0] > 0 else out
+        out = self.dropout1(self.relu1(self.bn1(out)))
+        out = self.conv2(out)
+        out = out[:, :, :-self.conv2.padding[0]] if self.conv2.padding[0] > 0 else out
+        out = self.dropout2(self.relu2(self.bn2(out)))
+        res = x if self.downsample is None else self.downsample(x)
+        if res.size(2) != out.size(2):
+            diff = res.size(2) - out.size(2)
+            res = res[:, :, :-diff] if diff > 0 else nn.functional.pad(res, (0, -diff))
+        return self.relu(out + res)
+class TCN(nn.Module):
+    def __init__(self, input_dim, num_channels=[128, 256, 512, 512], kernel_size=3, dropout=0.3):
+        super(TCN, self).__init__()
+        layers = []
+        for i in range(len(num_channels)):
+            dilation = 2 ** i
+            in_ch = input_dim if i == 0 else num_channels[i-1]
+            layers.append(TemporalBlock(in_ch, num_channels[i], kernel_size, stride=1, dilation=dilation, dropout=dropout))
+        self.network = nn.Sequential(*layers)
+        self.classifier = nn.Sequential(
+            nn.AdaptiveAvgPool1d(1), nn.Flatten(), nn.Dropout(0.5),
+            nn.Linear(num_channels[-1], 256), nn.ReLU(), nn.Dropout(0.3), nn.Linear(256, 2)
+        )
+    def forward(self, x):
+        return self.classifier(self.network(x.transpose(1, 2)))
+class ResidualBlock1D(nn.Module):
+    def __init__(self, in_channels, out_channels, stride=1):
+        super(ResidualBlock1D, self).__init__()
+        mid = out_channels // 4
+        self.conv1 = nn.Conv1d(in_channels, mid, 1, bias=False)
+        self.bn1 = nn.BatchNorm1d(mid)
+        self.conv2 = nn.Conv1d(mid, mid, 3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm1d(mid)
+        self.conv3 = nn.Conv1d(mid, out_channels, 1, bias=False)
+        self.bn3 = nn.BatchNorm1d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+        self.dropout = nn.Dropout(0.3)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_channels != out_channels:
+            self.shortcut = nn.Sequential(nn.Conv1d(in_channels, out_channels, 1, stride=stride, bias=False), nn.BatchNorm1d(out_channels))
+    def forward(self, x):
+        out = self.relu(self.bn1(self.conv1(x)))
+        out = self.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        return self.dropout(self.relu(out + self.shortcut(x)))
+class ResNet1D(nn.Module):
+    def __init__(self, input_dim, num_classes=2):
+        super(ResNet1D, self).__init__()
+        self.conv1 = nn.Conv1d(input_dim, 64, 7, stride=2, padding=3, bias=False)
+        self.bn1 = nn.BatchNorm1d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool1d(3, stride=2, padding=1)
+        self.layer1 = self._make_layer(64, 256, 2, 1)
+        self.layer2 = self._make_layer(256, 512, 2, 2)
+        self.layer3 = self._make_layer(512, 1024, 2, 2)
+        self.avgpool = nn.AdaptiveAvgPool1d(1)
+        self.fc = nn.Sequential(nn.Dropout(0.5), nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(0.3), nn.Linear(512, num_classes))
+    def _make_layer(self, in_ch, out_ch, blocks, stride):
+        layers = [ResidualBlock1D(in_ch, out_ch, stride)]
+        for _ in range(1, blocks):
+            layers.append(ResidualBlock1D(out_ch, out_ch, 1))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.maxpool(self.relu(self.bn1(self.conv1(x.transpose(1, 2)))))
+        x = self.layer3(self.layer2(self.layer1(x)))
+        return self.fc(self.avgpool(x).view(x.size(0), -1))
+# ==================== DETECTOR CLASS ====================
+class AIDetector:
+    def __init__(self):
+        self.device = "cpu"  # Force CPU for HF Spaces free tier
+        self.hidden_dim = 2048
+        self.max_patches = 103
+        # Models (lazy loaded)
+        self.qwen_model = None
+        self.qwen_processor = None
+        self.tcn = None
+        self.resnet = None
+        self.meta_model = None
+        self.loaded = False
+    def load_qwen(self):
+        """Load Qwen2.5-VL for feature extraction"""
+        if self.qwen_model is None:
+            print("Loading Qwen2.5-VL-3B-Instruct (this takes ~2 minutes on CPU)...")
+            from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+            model_id = "Qwen/Qwen2.5-VL-3B-Instruct"
+            self.qwen_processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+            self.qwen_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+                model_id,
+                torch_dtype=torch.float32,
+                device_map="cpu",
+                trust_remote_code=True,
+                low_cpu_mem_usage=True
+            )
+            self.qwen_model.eval()
+            print("Qwen loaded!")
+    def load_classifiers(self):
+        """Load trained classifiers"""
+        if not self.loaded:
+            print("Loading classifiers...")
+            # TCN
+            self.tcn = TCN(self.hidden_dim).to(self.device)
+            self.tcn.load_state_dict(torch.load("tcn_best.pth", map_location=self.device))
+            self.tcn.eval()
+            # ResNet-1D
+            self.resnet = ResNet1D(self.hidden_dim).to(self.device)
+            self.resnet.load_state_dict(torch.load("resnet1d_best.pth", map_location=self.device))
+            self.resnet.eval()
+            # Meta-learner
+            with open("meta_model.pkl", "rb") as f:
+                self.meta_model = pickle.load(f)
+            self.loaded = True
+            print("Classifiers loaded!")
+    def extract_features(self, image: Image.Image) -> np.ndarray:
+        """Extract features from image using Qwen2.5-VL"""
+        self.load_qwen()
+        from qwen_vl_utils import process_vision_info
+        # Resize image
+        image = image.convert("RGB").resize((256, 256), Image.LANCZOS)
+        messages = [{"role": "user", "content": [{"type": "image", "image": image}, {"type": "text", "text": "Describe"}]}]
+        text = self.qwen_processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        image_inputs, _ = process_vision_info(messages)
+        inputs = self.qwen_processor(text=[text], images=image_inputs, padding=True, return_tensors="pt")
+        with torch.no_grad():
+            outputs = self.qwen_model.model(
+                input_ids=inputs["input_ids"],
+                attention_mask=inputs["attention_mask"],
+                pixel_values=inputs.get("pixel_values"),
+                image_grid_thw=inputs.get("image_grid_thw"),
+                output_hidden_states=True
+            )
+            features = outputs.last_hidden_state[0].cpu().numpy()
+        return features
+    def pad_features(self, features: np.ndarray) -> np.ndarray:
+        """Pad/truncate features to expected size"""
+        n = features.shape[0]
+        if n < self.max_patches:
+            padded = np.zeros((self.max_patches, self.hidden_dim), dtype=np.float32)
+            padded[:n] = features
+            return padded
+        return features[:self.max_patches].astype(np.float32)
+    def predict(self, image: Image.Image) -> dict:
+        """Full prediction pipeline"""
+        self.load_classifiers()
+        # Extract features
+        features = self.extract_features(image)
+        features = self.pad_features(features)
+        x = torch.FloatTensor(features).unsqueeze(0).to(self.device)
+        # Get base model predictions
+        with torch.no_grad():
+            tcn_out = torch.softmax(self.tcn(x), dim=1)
+            tcn_prob = tcn_out[0, 1].item()
+            tcn_pred = 1 if tcn_prob > 0.5 else 0
+            resnet_out = torch.softmax(self.resnet(x), dim=1)
+            resnet_prob = resnet_out[0, 1].item()
+            resnet_pred = 1 if resnet_prob > 0.5 else 0
+        # Meta-learner stacking
+        meta_features = np.array([[tcn_pred, tcn_prob, resnet_pred, resnet_prob]])
+        final_prob = self.meta_model.predict_proba(meta_features)[0, 1]
+        # AI probability is 1 - real probability
+        ai_percentage = (1 - final_prob) * 100
+        return {
+            "ai_percentage": round(ai_percentage, 2),
+            "real_percentage": round(final_prob * 100, 2),
+            "verdict": "AI Generated" if ai_percentage > 50 else "Real",
+            "confidence": round(max(ai_percentage, 100 - ai_percentage), 2),
+            "tcn_ai_prob": round((1 - tcn_prob) * 100, 2),
+            "resnet_ai_prob": round((1 - resnet_prob) * 100, 2)
+        }
+# ==================== GRADIO API ====================
+detector = AIDetector()
+def detect(image):
+    """API endpoint function"""
+    if image is None:
+        return {"error": "No image provided"}
+    try:
+        result = detector.predict(image)
+        return result
+    except Exception as e:
+        return {"error": str(e)}
+# Simple UI for testing + API endpoint
+demo = gr.Interface(
+    fn=detect,
+    inputs=gr.Image(type="pil", label="Upload Image"),
+    outputs=gr.JSON(label="Result"),
+    title="AI Image Detector API",
+    description="Upload an image to detect if it's AI-generated. Returns JSON with ai_percentage.",
+    allow_flagging="never"
+)
+# This exposes the API at /api/predict
+if __name__ == "__main__":
+    demo.launch()

app_gradio_ui.py ADDED Viewed

	@@ -0,0 +1,528 @@

+#!/usr/bin/env python3
+"""
+AI Image Detector - Hugging Face Spaces Deployment
+Stacking Ensemble (TCN + ResNet-1D) with Qwen2.5-VL Feature Extraction
+This app detects whether an image is AI-generated or real using:
+1. Qwen2.5-VL for spatial feature extraction
+2. TCN + ResNet-1D stacking ensemble for classification
+"""
+import gradio as gr
+import torch
+import torch.nn as nn
+import numpy as np
+from PIL import Image
+import pickle
+import os
+# ==================== MODEL DEFINITIONS ====================
+class TemporalBlock(nn.Module):
+    """Temporal Block with Dilated Convolutions"""
+    def __init__(self, in_channels, out_channels, kernel_size, stride, dilation, dropout=0.3):
+        super(TemporalBlock, self).__init__()
+        padding = (kernel_size - 1) * dilation
+        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size,
+                               stride=stride, padding=padding, dilation=dilation)
+        self.bn1 = nn.BatchNorm1d(out_channels)
+        self.relu1 = nn.ReLU()
+        self.dropout1 = nn.Dropout(dropout)
+        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size,
+                               stride=stride, padding=padding, dilation=dilation)
+        self.bn2 = nn.BatchNorm1d(out_channels)
+        self.relu2 = nn.ReLU()
+        self.dropout2 = nn.Dropout(dropout)
+        self.downsample = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else None
+        self.relu = nn.ReLU()
+    def forward(self, x):
+        out = self.conv1(x)
+        out = out[:, :, :-self.conv1.padding[0]] if self.conv1.padding[0] > 0 else out
+        out = self.bn1(out)
+        out = self.relu1(out)
+        out = self.dropout1(out)
+        out = self.conv2(out)
+        out = out[:, :, :-self.conv2.padding[0]] if self.conv2.padding[0] > 0 else out
+        out = self.bn2(out)
+        out = self.relu2(out)
+        out = self.dropout2(out)
+        res = x if self.downsample is None else self.downsample(x)
+        if res.size(2) != out.size(2):
+            diff = res.size(2) - out.size(2)
+            if diff > 0:
+                res = res[:, :, :-diff]
+            else:
+                res = nn.functional.pad(res, (0, -diff))
+        return self.relu(out + res)
+class TCN(nn.Module):
+    """Temporal Convolutional Network"""
+    def __init__(self, input_dim, num_channels=[128, 256, 512, 512], kernel_size=3, dropout=0.3):
+        super(TCN, self).__init__()
+        layers = []
+        num_levels = len(num_channels)
+        for i in range(num_levels):
+            dilation = 2 ** i
+            in_channels = input_dim if i == 0 else num_channels[i-1]
+            out_channels = num_channels[i]
+            layers.append(
+                TemporalBlock(in_channels, out_channels, kernel_size,
+                            stride=1, dilation=dilation, dropout=dropout)
+            )
+        self.network = nn.Sequential(*layers)
+        self.classifier = nn.Sequential(
+            nn.AdaptiveAvgPool1d(1),
+            nn.Flatten(),
+            nn.Dropout(0.5),
+            nn.Linear(num_channels[-1], 256),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(256, 2)
+        )
+    def forward(self, x):
+        x = x.transpose(1, 2)
+        x = self.network(x)
+        x = self.classifier(x)
+        return x
+class ResidualBlock1D(nn.Module):
+    """1D Residual Block"""
+    def __init__(self, in_channels, out_channels, stride=1):
+        super(ResidualBlock1D, self).__init__()
+        mid_channels = out_channels // 4
+        self.conv1 = nn.Conv1d(in_channels, mid_channels, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm1d(mid_channels)
+        self.conv2 = nn.Conv1d(mid_channels, mid_channels, kernel_size=3,
+                               stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm1d(mid_channels)
+        self.conv3 = nn.Conv1d(mid_channels, out_channels, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm1d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+        self.dropout = nn.Dropout(0.3)
+        self.shortcut = nn.Sequential()
+        if stride != 1 or in_channels != out_channels:
+            self.shortcut = nn.Sequential(
+                nn.Conv1d(in_channels, out_channels, kernel_size=1,
+                         stride=stride, bias=False),
+                nn.BatchNorm1d(out_channels)
+            )
+    def forward(self, x):
+        residual = x
+        out = self.relu(self.bn1(self.conv1(x)))
+        out = self.relu(self.bn2(self.conv2(out)))
+        out = self.bn3(self.conv3(out))
+        out += self.shortcut(residual)
+        out = self.relu(out)
+        out = self.dropout(out)
+        return out
+class ResNet1D(nn.Module):
+    """ResNet-1D for Sequential Classification"""
+    def __init__(self, input_dim, num_classes=2):
+        super(ResNet1D, self).__init__()
+        self.conv1 = nn.Conv1d(input_dim, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = nn.BatchNorm1d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(64, 256, num_blocks=2, stride=1)
+        self.layer2 = self._make_layer(256, 512, num_blocks=2, stride=2)
+        self.layer3 = self._make_layer(512, 1024, num_blocks=2, stride=2)
+        self.avgpool = nn.AdaptiveAvgPool1d(1)
+        self.fc = nn.Sequential(
+            nn.Dropout(0.5),
+            nn.Linear(1024, 512),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(512, num_classes)
+        )
+    def _make_layer(self, in_channels, out_channels, num_blocks, stride):
+        layers = []
+        layers.append(ResidualBlock1D(in_channels, out_channels, stride))
+        for _ in range(1, num_blocks):
+            layers.append(ResidualBlock1D(out_channels, out_channels, stride=1))
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = x.transpose(1, 2)
+        x = self.relu(self.bn1(self.conv1(x)))
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        return x
+# ==================== FEATURE EXTRACTOR ====================
+class FeatureExtractor:
+    """Extract spatial features using Qwen2.5-VL"""
+    def __init__(self, model_id="Qwen/Qwen2.5-VL-3B-Instruct"):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model = None
+        self.processor = None
+        self.model_id = model_id
+        self.target_size = (256, 256)
+    def load_model(self):
+        """Load the Qwen2.5-VL model (lazy loading)"""
+        if self.model is None:
+            from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+            print(f"Loading {self.model_id}...")
+            self.processor = AutoProcessor.from_pretrained(
+                self.model_id,
+                trust_remote_code=True
+            )
+            self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+                self.model_id,
+                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
+                device_map="auto",
+                trust_remote_code=True,
+                low_cpu_mem_usage=True
+            )
+            self.model.eval()
+            print("Model loaded!")
+    def preprocess_image(self, image):
+        """Preprocess image with aspect ratio preservation"""
+        if isinstance(image, str):
+            image = Image.open(image)
+        image = image.convert('RGB')
+        # Resize with padding to preserve aspect ratio
+        width, height = image.size
+        scale = min(self.target_size[0] / width, self.target_size[1] / height)
+        new_width = int(width * scale)
+        new_height = int(height * scale)
+        image = image.resize((new_width, new_height), Image.LANCZOS)
+        # Create black canvas and paste resized image in center
+        canvas = Image.new('RGB', self.target_size, (0, 0, 0))
+        paste_x = (self.target_size[0] - new_width) // 2
+        paste_y = (self.target_size[1] - new_height) // 2
+        canvas.paste(image, (paste_x, paste_y))
+        return canvas
+    def extract_features(self, image):
+        """Extract spatial features from an image"""
+        self.load_model()
+        from qwen_vl_utils import process_vision_info
+        image = self.preprocess_image(image)
+        with torch.no_grad():
+            messages = [{
+                "role": "user",
+                "content": [
+                    {"type": "image", "image": image},
+                    {"type": "text", "text": "Image"}
+                ]
+            }]
+            text = self.processor.apply_chat_template(
+                messages, tokenize=False, add_generation_prompt=True
+            )
+            image_inputs, _ = process_vision_info(messages)
+            inputs = self.processor(
+                text=[text],
+                images=image_inputs,
+                padding=True,
+                return_tensors="pt"
+            )
+            inputs = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v
+                      for k, v in inputs.items()}
+            outputs = self.model.model(
+                input_ids=inputs['input_ids'],
+                attention_mask=inputs['attention_mask'],
+                pixel_values=inputs.get('pixel_values'),
+                image_grid_thw=inputs.get('image_grid_thw'),
+                output_hidden_states=True
+            )
+            spatial_features = outputs.last_hidden_state[0].cpu().numpy()
+        return spatial_features
+# ==================== DETECTOR ====================
+class AIImageDetector:
+    """AI Image Detector using Stacking Ensemble"""
+    def __init__(self, models_dir="models"):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.models_dir = models_dir
+        self.feature_extractor = FeatureExtractor()
+        self.tcn_model = None
+        self.resnet_model = None
+        self.meta_model = None
+        self.max_patches = None
+        self.hidden_dim = None
+    def load_models(self):
+        """Load all trained models"""
+        print("Loading models...")
+        # Load config
+        config_path = os.path.join(self.models_dir, "config.pkl")
+        if os.path.exists(config_path):
+            with open(config_path, 'rb') as f:
+                config = pickle.load(f)
+                self.max_patches = config['max_patches']
+                self.hidden_dim = config['hidden_dim']
+        else:
+            # Default values from training
+            self.max_patches = 256
+            self.hidden_dim = 2048
+        # Load TCN
+        tcn_path = os.path.join(self.models_dir, "tcn_best.pth")
+        if os.path.exists(tcn_path):
+            self.tcn_model = TCN(self.hidden_dim).to(self.device)
+            self.tcn_model.load_state_dict(torch.load(tcn_path, map_location=self.device))
+            self.tcn_model.eval()
+            print("TCN loaded!")
+        # Load ResNet-1D
+        resnet_path = os.path.join(self.models_dir, "resnet1d_best.pth")
+        if os.path.exists(resnet_path):
+            self.resnet_model = ResNet1D(self.hidden_dim).to(self.device)
+            self.resnet_model.load_state_dict(torch.load(resnet_path, map_location=self.device))
+            self.resnet_model.eval()
+            print("ResNet-1D loaded!")
+        # Load meta-learner
+        meta_path = os.path.join(self.models_dir, "meta_model.pkl")
+        if os.path.exists(meta_path):
+            with open(meta_path, 'rb') as f:
+                self.meta_model = pickle.load(f)
+            print("Meta-learner loaded!")
+        print("All models loaded!")
+    def pad_features(self, features):
+        """Pad features to max_patches"""
+        num_patches = features.shape[0]
+        if num_patches < self.max_patches:
+            padded = np.zeros((self.max_patches, self.hidden_dim), dtype=np.float32)
+            padded[:num_patches, :] = features
+            return padded
+        else:
+            return features[:self.max_patches, :]
+    def predict(self, image):
+        """
+        Predict whether an image is AI-generated or real
+        Returns:
+            dict: {
+                'prediction': 'AI Generated' or 'Real',
+                'confidence': float,
+                'tcn_prob': float,
+                'resnet_prob': float,
+                'details': str
+            }
+        """
+        # Ensure models are loaded
+        if self.tcn_model is None:
+            self.load_models()
+        # Extract features
+        features = self.feature_extractor.extract_features(image)
+        # Pad features
+        padded_features = self.pad_features(features)
+        # Convert to tensor
+        x = torch.FloatTensor(padded_features).unsqueeze(0).to(self.device)
+        # Get predictions from base models
+        with torch.no_grad():
+            tcn_output = self.tcn_model(x)
+            tcn_probs = torch.softmax(tcn_output, dim=1)
+            tcn_prob = tcn_probs[0, 1].cpu().item()  # Probability of Real
+            tcn_pred = 1 if tcn_prob > 0.5 else 0
+            resnet_output = self.resnet_model(x)
+            resnet_probs = torch.softmax(resnet_output, dim=1)
+            resnet_prob = resnet_probs[0, 1].cpu().item()  # Probability of Real
+            resnet_pred = 1 if resnet_prob > 0.5 else 0
+        # Stack for meta-learner
+        if self.meta_model is not None:
+            meta_features = np.array([[tcn_pred, tcn_prob, resnet_pred, resnet_prob]])
+            final_pred = self.meta_model.predict(meta_features)[0]
+            final_prob = self.meta_model.predict_proba(meta_features)[0, 1]
+        else:
+            # Simple averaging fallback
+            final_prob = (tcn_prob + resnet_prob) / 2
+            final_pred = 1 if final_prob > 0.5 else 0
+        # Determine prediction
+        prediction = "Real" if final_pred == 1 else "AI Generated"
+        confidence = final_prob if final_pred == 1 else (1 - final_prob)
+        return {
+            'prediction': prediction,
+            'confidence': confidence,
+            'tcn_prob': tcn_prob,
+            'resnet_prob': resnet_prob,
+            'details': f"TCN: {tcn_prob:.2%} Real | ResNet-1D: {resnet_prob:.2%} Real | Ensemble: {final_prob:.2%} Real"
+        }
+# ==================== GRADIO INTERFACE ====================
+# Initialize detector (will load models on first prediction)
+detector = AIImageDetector(models_dir="models")
+def detect_image(image):
+    """Gradio interface function"""
+    if image is None:
+        return "Please upload an image", "", ""
+    try:
+        result = detector.predict(image)
+        # Format output
+        if result['prediction'] == "AI Generated":
+            label = f"🤖 AI Generated ({result['confidence']:.1%} confidence)"
+            color = "red"
+        else:
+            label = f"📷 Real Image ({result['confidence']:.1%} confidence)"
+            color = "green"
+        details = result['details']
+        # Create confidence display
+        ai_conf = 1 - result['confidence'] if result['prediction'] == "Real" else result['confidence']
+        real_conf = result['confidence'] if result['prediction'] == "Real" else 1 - result['confidence']
+        confidence_display = f"""
+### Model Predictions:
+- **TCN Model**: {result['tcn_prob']:.1%} Real / {1-result['tcn_prob']:.1%} AI
+- **ResNet-1D Model**: {result['resnet_prob']:.1%} Real / {1-result['resnet_prob']:.1%} AI
+### Final Ensemble Verdict:
+- **AI Generated**: {ai_conf:.1%}
+- **Real Image**: {real_conf:.1%}
+"""
+        return label, confidence_display, details
+    except Exception as e:
+        return f"Error: {str(e)}", "", ""
+# Create Gradio interface
+with gr.Blocks(title="AI Image Detector", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🔍 AI Image Detector
+    **Detect whether an image is AI-generated or a real photograph**
+    This detector uses a stacking ensemble of:
+    - 🧠 **TCN** (Temporal Convolutional Network)
+    - 🏗️ **ResNet-1D** (Deep Residual Network)
+    Features are extracted using **Qwen2.5-VL** vision-language model.
+    ---
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            image_input = gr.Image(type="pil", label="Upload Image")
+            detect_btn = gr.Button("🔍 Detect", variant="primary")
+        with gr.Column(scale=1):
+            prediction_output = gr.Textbox(label="Prediction", lines=1)
+            confidence_output = gr.Markdown(label="Confidence Details")
+            details_output = gr.Textbox(label="Raw Details", lines=2)
+    # Examples
+    gr.Markdown("### 📸 Try these examples:")
+    gr.Examples(
+        examples=[
+            # Add example images here
+        ],
+        inputs=image_input
+    )
+    # Connect button
+    detect_btn.click(
+        fn=detect_image,
+        inputs=[image_input],
+        outputs=[prediction_output, confidence_output, details_output]
+    )
+    gr.Markdown("""
+    ---
+    ### ℹ️ About
+    This model was trained on a dataset of AI-generated and real images.
+    **Accuracy**: ~97%+ on test set
+    **Note**: Results are probabilistic. Always verify important decisions.
+    """)
+if __name__ == "__main__":
+    demo.launch()

meta_model.pkl ADDED Viewed

Binary file (742 Bytes). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+# AI Image Detector - Hugging Face Spaces Requirements
+# Core ML frameworks
+torch>=2.0.0
+transformers>=4.37.0
+accelerate>=0.25.0
+# Qwen2.5-VL specific
+qwen-vl-utils==0.0.8
+# ML utilities
+numpy>=1.24.0
+scikit-learn>=1.3.0
+pillow>=10.0.0
+# Web interface
+gradio>=4.0.0
+# Optional: for faster inference
+# bitsandbytes>=0.41.0  # 8-bit quantization

resnet1d_best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc6e47b7f2c97b8ec1a10c9c0dbb6e19afe59200000512e39fd9d51bd11470d8
+size 15486826

save_models_for_deploy.py ADDED Viewed

	@@ -0,0 +1,133 @@

+#!/usr/bin/env python3
+"""
+Save Models for Hugging Face Deployment
+Run this cell AFTER training TCN, ResNet-1D, and the stacking ensemble.
+This will save all necessary files for deployment.
+Required variables in memory:
+- tcn_results (with 'model' key or separate tcn model)
+- resnet1d_results (with 'model' key or separate resnet model)
+- meta_model (trained sklearn meta-learner)
+- data (with 'sequential' containing max_patches and hidden_dim)
+"""
+import torch
+import pickle
+import os
+import shutil
+# ==================== CONFIGURATION ====================
+OUTPUT_DIR = '/kaggle/working/deploy_models'
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+print("="*80)
+print("💾 SAVING MODELS FOR HUGGING FACE DEPLOYMENT")
+print("="*80)
+# ==================== SAVE CONFIG ====================
+print("\n📝 Saving configuration...")
+config = {
+    'max_patches': data['sequential']['max_patches'] if 'max_patches' in data['sequential'] else 256,
+    'hidden_dim': data['sequential']['hidden_dim'] if 'hidden_dim' in data['sequential'] else 2048,
+}
+config_path = os.path.join(OUTPUT_DIR, 'config.pkl')
+with open(config_path, 'wb') as f:
+    pickle.dump(config, f)
+print(f"✅ Config saved to {config_path}")
+print(f"   • max_patches: {config['max_patches']}")
+print(f"   • hidden_dim: {config['hidden_dim']}")
+# ==================== SAVE TCN MODEL ====================
+print("\n📦 Saving TCN model...")
+# Check if TCN model state dict already exists
+if os.path.exists('tcn_best.pth'):
+    shutil.copy('tcn_best.pth', os.path.join(OUTPUT_DIR, 'tcn_best.pth'))
+    print(f"✅ TCN model copied from tcn_best.pth")
+else:
+    print("⚠️  tcn_best.pth not found. Please save your TCN model:")
+    print("   torch.save(tcn_model.state_dict(), 'tcn_best.pth')")
+# ==================== SAVE RESNET-1D MODEL ====================
+print("\n📦 Saving ResNet-1D model...")
+# Check if ResNet model state dict already exists
+if os.path.exists('resnet1d_best.pth'):
+    shutil.copy('resnet1d_best.pth', os.path.join(OUTPUT_DIR, 'resnet1d_best.pth'))
+    print(f"✅ ResNet-1D model copied from resnet1d_best.pth")
+else:
+    print("⚠️  resnet1d_best.pth not found. Please save your ResNet model:")
+    print("   torch.save(resnet_model.state_dict(), 'resnet1d_best.pth')")
+# ==================== SAVE META-LEARNER ====================
+print("\n📦 Saving meta-learner...")
+try:
+    meta_path = os.path.join(OUTPUT_DIR, 'meta_model.pkl')
+    with open(meta_path, 'wb') as f:
+        pickle.dump(meta_model, f)
+    print(f"✅ Meta-learner saved to {meta_path}")
+except NameError:
+    print("⚠️  meta_model not found. Please ensure the stacking ensemble has been trained.")
+# ==================== CREATE ZIP ====================
+print("\n📦 Creating deployment package...")
+import zipfile
+zip_path = '/kaggle/working/huggingface_deploy.zip'
+with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+    for root, dirs, files in os.walk(OUTPUT_DIR):
+        for file in files:
+            file_path = os.path.join(root, file)
+            arcname = os.path.relpath(file_path, OUTPUT_DIR)
+            zipf.write(file_path, arcname)
+zip_size_mb = os.path.getsize(zip_path) / (1024**2)
+print(f"✅ Deployment package created: {zip_path}")
+print(f"   Size: {zip_size_mb:.2f} MB")
+# ==================== SUMMARY ====================
+print("\n" + "="*80)
+print("📋 DEPLOYMENT CHECKLIST")
+print("="*80)
+print("\n✅ Files saved:")
+for f in os.listdir(OUTPUT_DIR):
+    size_kb = os.path.getsize(os.path.join(OUTPUT_DIR, f)) / 1024
+    print(f"   • {f} ({size_kb:.1f} KB)")
+print("\n📝 Next Steps:")
+print("1. Download huggingface_deploy.zip from Kaggle")
+print("2. Create a new Hugging Face Space:")
+print("   huggingface-cli repo create your-username/ai-image-detector --type space --space_sdk gradio")
+print("3. Clone and add files:")
+print("   git clone https://huggingface.co/spaces/your-username/ai-image-detector")
+print("   cd ai-image-detector")
+print("   # Extract model files to models/ folder")
+print("   # Copy app.py, requirements.txt, README.md")
+print("4. Push to Hugging Face:")
+print("   git add .")
+print("   git commit -m 'Add AI image detector'")
+print("   git push")
+print("\n💡 Alternative: Upload directly via Hugging Face web interface")
+print("   Go to huggingface.co/new-space and upload files")
+print("="*80)
+print("✅ SAVE COMPLETE!")
+print("="*80)
+# Display download link
+from IPython.display import FileLink
+FileLink(zip_path)

tcn_best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f99f0185ffefb8de5eda586f3ceb99f4fb5d46c13ab74a61862e3dc92f2582a
+size 17845974