Spaces:

Shreesha1
/

Deepfake

Sleeping

App Files Files Community

Shreesha1 commited on Jan 29

Commit

b5fabfd

verified ·

1 Parent(s): 29c2819

First

Browse files

Files changed (6) hide show

Dockerfile +24 -0
app.py +488 -0
dataset.py +230 -0
model.py +110 -0
requirements.txt +21 -0
slop_detector.py +228 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+FROM python:3.11-slim-bookworm
+# System deps for OpenCV, etc.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgl1 \
+    libglib2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Install Python deps
+COPY requirements.txt .
+RUN pip install --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy code + models
+COPY . .
+# Cloud Run will set PORT env; default to 8080 if not set
+ENV PORT=8080
+EXPOSE 8080
+# Use sh -c so ${PORT} is expanded by the shell
+CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port ${PORT:-8080}"]

app.py ADDED Viewed

	@@ -0,0 +1,488 @@

+from fastapi import FastAPI, UploadFile, File, HTTPException, Form
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import Optional
+from contextlib import asynccontextmanager
+import torch
+import os
+import shutil
+import tempfile
+import torch.nn.functional as F
+from pathlib import Path
+from model import DeepfakeDetector, FeatureExtractor
+from dataset import extract_frames_from_video, process_image
+from slop_detector import SlopDetector, detect_ai_text, analyze_text_content
+BASE_DIR = Path(__file__).resolve().parent
+SEQUENCE_LENGTH = 10
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # --- Startup: Load Models Eagerly ---
+    print("Startup: Pre-loading default models to avoid delay...")
+    try:
+        # Load Video Model
+        load_model_if_needed()
+        # Load Text Model
+        load_slop_detector_if_needed()
+        print("Startup: All models loaded and ready!")
+    except Exception as e:
+        print(f"Startup Warning: Could not pre-load models: {e}")
+    yield
+    # --- Shutdown (Cleanup if needed) ---
+    print("Shutdown: Cleaning up...")
+app = FastAPI(lifespan=lifespan)
+allowed_origins = [
+    "http://localhost:5173",                 # local vite
+    "http://localhost:8080",                 # if you're using that
+    "https://deepfake-detection-lime.vercel.app/",    # ← replace with real URL after first deploy
+]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # tighten in prod
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# --- Model Paths ---
+SAVED_MODEL_PATH = BASE_DIR / "saved_models" / "deepfake_detector_best.pth"
+model = None
+feature_dim = None
+model_error: str | None = None
+# Slop detector for AI text detection
+slop_detector = None
+slop_detector_error: str | None = None
+# Pydantic models for request/response
+class TextAnalysisRequest(BaseModel):
+    text: str
+class TextAnalysisResponse(BaseModel):
+    status: str
+    label: str
+    confidence: float
+    is_ai_generated: bool
+    details: Optional[dict] = None
+def load_model_if_needed():
+    global model, feature_dim, model_error
+    if model is not None:
+        return
+    print("Loading deepfake model lazily on first request...")
+    try:
+        temp_cnn = FeatureExtractor(freeze=True)
+        feature_dim_local = temp_cnn.feature_dim
+        del temp_cnn
+        m = DeepfakeDetector(
+            cnn_feature_dim=feature_dim_local,
+            lstm_hidden_size=512,
+            lstm_layers=2,
+        ).to(DEVICE)
+        if not os.path.exists(SAVED_MODEL_PATH):
+            err = f"Model file not found at: {SAVED_MODEL_PATH}"
+            print("Error:", err)
+            model_error = err
+            return
+        state = torch.load(SAVED_MODEL_PATH, map_location=DEVICE)
+        m.load_state_dict(state)
+        m.eval()
+        # Update globals
+        model_error = None
+        globals()["model"] = m
+        globals()["feature_dim"] = feature_dim_local
+        print("Model loaded successfully!")
+    except Exception as e:
+        model_error = str(e)
+        print(f"Error loading model: {e}")
+def load_slop_detector_if_needed():
+    global slop_detector, slop_detector_error
+    if slop_detector is not None:
+        return
+    print("Loading slop detector for AI text detection...")
+    try:
+        detector = SlopDetector(device=str(DEVICE))
+        detector.load_model()
+        slop_detector_error = None
+        globals()["slop_detector"] = detector
+        print("Slop detector loaded successfully!")
+    except Exception as e:
+        slop_detector_error = str(e)
+        print(f"Error loading slop detector: {e}")
+@app.get("/")
+def root():
+    return {"message": "Deepfake detector backend running"}
+@app.get("/health")
+def health():
+    status_info = {}
+    # Check deepfake model status
+    if model_error is not None:
+        status_info["deepfake_model"] = {"status": "error", "detail": model_error}
+    elif model is None:
+        status_info["deepfake_model"] = {"status": "not_loaded_yet"}
+    else:
+        status_info["deepfake_model"] = {"status": "ok"}
+    # Check slop detector status
+    if slop_detector_error is not None:
+        status_info["slop_detector"] = {"status": "error", "detail": slop_detector_error}
+    elif slop_detector is None:
+        status_info["slop_detector"] = {"status": "not_loaded_yet"}
+    else:
+        status_info["slop_detector"] = {"status": "ok"}
+    overall_status = "ok"
+    if model_error or slop_detector_error:
+        overall_status = "partial_error"
+    elif model is None and slop_detector is None:
+        overall_status = "models_not_loaded_yet"
+    return {"status": overall_status, "models": status_info}
+@app.post("/predict")
+async def predict_video(file: UploadFile = File(...)):
+    # Lazy load model on first request
+    load_model_if_needed()
+    if model is None:
+        # loading failed
+        raise HTTPException(
+            status_code=503,
+            detail=f"Model not available on server. Error: {model_error}",
+        )
+    if not file.filename.lower().endswith((".mp4", ".mov", ".avi")):
+        raise HTTPException(
+            status_code=400,
+            detail="Invalid file type. Please upload .mp4, .mov, or .avi",
+        )
+    # Save uploaded file to temp path
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
+        shutil.copyfileobj(file.file, temp_file)
+        temp_file_path = temp_file.name
+    try:
+        frames_tensor = extract_frames_from_video(
+            video_path=temp_file_path,
+            sequence_length=SEQUENCE_LENGTH,
+        )
+        if frames_tensor is None:
+            return {
+                "status": "error",
+                "message": "Could not detect a face in the video.",
+            }
+        frames_tensor = frames_tensor.unsqueeze(0).to(DEVICE)
+        with torch.no_grad():
+            output = model(frames_tensor)
+            probabilities = F.softmax(output, dim=1)
+            confidence, predicted_class = torch.max(probabilities, 1)
+            prediction_idx = predicted_class.item()
+            conf_score = confidence.item() * 100
+            result_label = "FAKE" if prediction_idx == 1 else "REAL"
+        return {
+            "status": "success",
+            "filename": file.filename,
+            "prediction": result_label,
+            "confidence": round(conf_score, 2),
+            "is_fake": prediction_idx == 1,
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+            os.remove(temp_file_path)
+@app.post("/analyze-image")
+async def analyze_image(file: UploadFile = File(...)):
+    # Lazy load model on first request
+    load_model_if_needed()
+    if model is None:
+        raise HTTPException(
+            status_code=503,
+            detail=f"Model not available on server. Error: {model_error}",
+        )
+    if not file.filename.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
+        raise HTTPException(
+            status_code=400,
+            detail="Invalid file type. Please upload .jpg, .jpeg, .png, or .webp",
+        )
+    # Save uploaded file to temp path
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
+        shutil.copyfileobj(file.file, temp_file)
+        temp_file_path = temp_file.name
+    try:
+        # Use the new process_image function
+        # This will return a tensor of shape [SEQUENCE_LENGTH, 3, 224, 224]
+        # essentially treating the image as a static video
+        frames_tensor = process_image(
+            image_path=temp_file_path,
+            sequence_length=SEQUENCE_LENGTH,
+        )
+        if frames_tensor is None:
+            return {
+                "status": "error",
+                "message": "Could not detect a face in the image.",
+            }
+        frames_tensor = frames_tensor.unsqueeze(0).to(DEVICE)
+        with torch.no_grad():
+            output = model(frames_tensor)
+            probabilities = F.softmax(output, dim=1)
+            confidence, predicted_class = torch.max(probabilities, 1)
+            prediction_idx = predicted_class.item()
+            conf_score = confidence.item() * 100
+            result_label = "FAKE" if prediction_idx == 1 else "REAL"
+        return {
+            "status": "success",
+            "filename": file.filename,
+            "prediction": result_label,
+            "confidence": round(conf_score, 2),
+            "is_fake": prediction_idx == 1,
+            "type": "image_analysis"
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
+@app.post("/analyze-text")
+async def analyze_text(request: TextAnalysisRequest):
+    load_slop_detector_if_needed()
+    if slop_detector is None:
+        raise HTTPException(
+            status_code=503,
+            detail=f"Slop detector not available. Error: {slop_detector_error}",
+        )
+    try:
+        result = slop_detector.detect(request.text)
+        return {
+            "status": "success",
+            "label": result.label,
+            "confidence": round(result.confidence, 2),
+            "is_ai_generated": result.is_ai_generated,
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/analyze-text-detailed")
+async def analyze_text_detailed(request: TextAnalysisRequest):
+    load_slop_detector_if_needed()
+    if slop_detector is None:
+        raise HTTPException(
+            status_code=503,
+            detail=f"Slop detector not available. Error: {slop_detector_error}",
+        )
+    try:
+        analysis = slop_detector.analyze_paragraphs(request.text)
+        return {
+            "status": "success",
+            **analysis
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/predict-combined")
+async def predict_combined(
+    file: UploadFile = File(...),
+    context_text: Optional[str] = Form(None),
+):
+    # Load both models
+    load_model_if_needed()
+    if model is None:
+        raise HTTPException(
+            status_code=503,
+            detail=f"Deepfake model not available. Error: {model_error}",
+        )
+    if not file.filename.lower().endswith((".mp4", ".mov", ".avi")):
+        raise HTTPException(
+            status_code=400,
+            detail="Invalid file type. Please upload .mp4, .mov, or .avi",
+        )
+    # Save uploaded file to temp path
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
+        shutil.copyfileobj(file.file, temp_file)
+        temp_file_path = temp_file.name
+    try:
+        # --- Video Deepfake Detection ---
+        frames_tensor = extract_frames_from_video(
+            video_path=temp_file_path,
+            sequence_length=SEQUENCE_LENGTH,
+        )
+        if frames_tensor is None:
+            video_result = {
+                "status": "error",
+                "message": "Could not detect a face in the video.",
+                "prediction": None,
+                "confidence": None,
+                "is_fake": None,
+            }
+        else:
+            frames_tensor = frames_tensor.unsqueeze(0).to(DEVICE)
+            with torch.no_grad():
+                output = model(frames_tensor)
+                probabilities = F.softmax(output, dim=1)
+                confidence, predicted_class = torch.max(probabilities, 1)
+                prediction_idx = predicted_class.item()
+                conf_score = confidence.item() * 100
+                result_label = "FAKE" if prediction_idx == 1 else "REAL"
+            video_result = {
+                "status": "success",
+                "prediction": result_label,
+                "confidence": round(conf_score, 2),
+                "is_fake": prediction_idx == 1,
+            }
+        # --- Text Context Analysis (if provided) ---
+        text_result = None
+        if context_text and context_text.strip():
+            load_slop_detector_if_needed()
+            if slop_detector is not None:
+                text_analysis = slop_detector.analyze_paragraphs(context_text)
+                text_result = {
+                    "status": "success",
+                    "overall_label": text_analysis["overall_label"],
+                    "overall_confidence": text_analysis["overall_confidence"],
+                    "ai_probability": text_analysis["ai_probability"],
+                    "paragraph_count": text_analysis["paragraph_count"],
+                    "ai_paragraph_count": text_analysis["ai_paragraph_count"],
+                }
+            else:
+                text_result = {
+                    "status": "error",
+                    "message": f"Slop detector not available: {slop_detector_error}"
+                }
+        # --- Combined Assessment ---
+        combined_verdict = determine_combined_verdict(video_result, text_result)
+        return {
+            "status": "success",
+            "filename": file.filename,
+            "video_analysis": video_result,
+            "text_analysis": text_result,
+            "combined_verdict": combined_verdict,
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
+def determine_combined_verdict(video_result: dict, text_result: Optional[dict]) -> dict:
+    video_fake = video_result.get("is_fake")
+    video_confidence = video_result.get("confidence", 0)
+    video_status = video_result.get("status")
+    text_ai = None
+    text_confidence = None
+    if text_result and text_result.get("status") == "success":
+        text_ai = text_result.get("overall_label") == "AI"
+        text_confidence = text_result.get("overall_confidence", 0)
+    # Determine verdict
+    if video_status == "error":
+        return {
+            "verdict": "INCONCLUSIVE",
+            "severity": "unknown",
+            "explanation": "Could not analyze video (no face detected). " +
+                          (f"Text appears {'AI-generated' if text_ai else 'human-written'}." if text_ai is not None else "")
+        }
+    if video_fake and text_ai:
+        return {
+            "verdict": "HIGH_RISK_DEEPFAKE",
+            "severity": "high",
+            "explanation": f"Video detected as FAKE ({video_confidence:.1f}% confidence) AND associated text appears AI-generated ({text_confidence:.1f}% confidence). This combination suggests sophisticated manipulation."
+        }
+    elif video_fake and text_ai is False:
+        return {
+            "verdict": "DEEPFAKE_DETECTED",
+            "severity": "high",
+            "explanation": f"Video detected as FAKE ({video_confidence:.1f}% confidence). Associated text appears human-written."
+        }
+    elif video_fake and text_ai is None:
+        return {
+            "verdict": "DEEPFAKE_DETECTED",
+            "severity": "high",
+            "explanation": f"Video detected as FAKE ({video_confidence:.1f}% confidence). No text context provided for additional analysis."
+        }
+    elif not video_fake and text_ai:
+        return {
+            "verdict": "SUSPICIOUS_CONTEXT",
+            "severity": "medium",
+            "explanation": f"Video appears REAL ({video_confidence:.1f}% confidence), but associated text appears AI-generated ({text_confidence:.1f}% confidence). Context may be misleading."
+        }
+    else:
+        return {
+            "verdict": "LIKELY_AUTHENTIC",
+            "severity": "low",
+            "explanation": f"Video appears REAL ({video_confidence:.1f}% confidence)." +
+                          (f" Associated text appears human-written ({text_confidence:.1f}% confidence)." if text_ai is False else "")
+        }

dataset.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import cv2
+import os
+import torch
+import numpy as np
+from torch.utils.data import Dataset
+from torchvision import transforms
+# Import Facenet-PyTorch for Face Detection (No TensorFlow needed)
+from facenet_pytorch import MTCNN
+# --- 1. CONFIGURATION ---
+# 10 frames is enough for a resume project and runs faster on CPU
+SEQUENCE_LENGTH_DEFAULT = 10
+IMG_SIZE = 224
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+# --- 2. INITIALIZE MTCNN ---
+print(f"Initializing MTCNN on {DEVICE}...")
+# keep_all=True returns all faces, we'll sort them.
+# select_largest=False because we manually sort by confidence/size if needed, but 'keep_all=False' (default) returns only best face?
+# actually detect returns all.
+mtcnn_detector = MTCNN(keep_all=True, device=DEVICE)
+# Standard normalization
+data_transforms = transforms.Compose([
+    transforms.ToPILImage(),
+    transforms.Resize((IMG_SIZE, IMG_SIZE)),
+    transforms.ToTensor(),
+    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+])
+# --- 3. PREPROCESSING FUNCTION ---
+def extract_frames_from_video(video_path, sequence_length=SEQUENCE_LENGTH_DEFAULT):
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        return None
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    if total_frames <= 0:
+        return None
+    processed_frames = []
+    frame_indices = np.linspace(0, total_frames - 1, sequence_length, dtype=int)
+    for i in frame_indices:
+        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
+        ret, frame = cap.read()
+        if not ret: continue
+        # Convert to RGB for MTCNN (OpenCV is BGR)
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        try:
+            # Detect faces
+            # boxes given as [x1, y1, x2, y2]
+            boxes, probs = mtcnn_detector.detect(frame_rgb)
+            if boxes is not None and len(boxes) > 0:
+                # Get highest probability face or first one?
+                # probs is list of probabilities. Filter valid ones.
+                # Just take the one with standard highest probability.
+                # Combine boxes and probs to sort
+                face_list = []
+                for box, prob in zip(boxes, probs):
+                    if prob is None: continue
+                    face_list.append({'box': box, 'conf': prob})
+                if not face_list: continue
+                best_face = sorted(face_list, key=lambda x: x['conf'], reverse=True)[0]
+                x1, y1, x2, y2 = best_face['box']
+                w = x2 - x1
+                h = y2 - y1
+                x = x1
+                y = y1
+                # Fix negative coordinates and float
+                x, y = max(0, int(x)), max(0, int(y))
+                w, h = int(w), int(h)
+                # Add padding (10%)
+                pad_w = int(w * 0.1)
+                pad_h = int(h * 0.1)
+                img_h, img_w, _ = frame.shape
+                y_min = max(0, y - pad_h)
+                y_max = min(img_h, y + h + pad_h)
+                x_min = max(0, x - pad_w)
+                x_max = min(img_w, x + w + pad_w)
+                face_crop = frame[y_min:y_max, x_min:x_max]
+                if face_crop.size != 0:
+                    processed_frame = data_transforms(face_crop)
+                    processed_frames.append(processed_frame)
+        except Exception as e:
+            # print(f"Frame processing error: {e}")
+            continue
+    cap.release()
+    if not processed_frames:
+        return None
+    # Padding if we missed some frames due to detection failure
+    while len(processed_frames) < sequence_length:
+        processed_frames.append(processed_frames[-1])
+    return torch.stack(processed_frames[:sequence_length])
+# --- 3b. IMAGE PROCESSING FUNCTION ---
+def process_image(image_path, sequence_length=SEQUENCE_LENGTH_DEFAULT):
+    try:
+        frame = cv2.imread(image_path)
+        if frame is None:
+            return None
+        # Convert to RGB
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        # Detect faces
+        boxes, probs = mtcnn_detector.detect(frame_rgb)
+        if boxes is None or len(boxes) == 0:
+            return None
+        face_list = []
+        for box, prob in zip(boxes, probs):
+            if prob is None: continue
+            face_list.append({'box': box, 'conf': prob})
+        if not face_list: return None
+        best_face = sorted(face_list, key=lambda x: x['conf'], reverse=True)[0]
+        x1, y1, x2, y2 = best_face['box']
+        w = x2 - x1
+        h = y2 - y1
+        x = x1
+        y = y1
+        # Integer conversion and padding
+        x, y = max(0, int(x)), max(0, int(y))
+        w, h = int(w), int(h)
+        pad_w = int(w * 0.1)
+        pad_h = int(h * 0.1)
+        img_h, img_w, _ = frame.shape
+        y_min = max(0, y - pad_h)
+        y_max = min(img_h, y + h + pad_h)
+        x_min = max(0, x - pad_w)
+        x_max = min(img_w, x + w + pad_w)
+        face_crop = frame[y_min:y_max, x_min:x_max]
+        if face_crop.size == 0:
+            return None
+        processed_frame = data_transforms(face_crop) # [3, 224, 224]
+        # Repeat this frame to create a fake sequence
+        return processed_frame.unsqueeze(0).repeat(sequence_length, 1, 1, 1)
+    except Exception as e:
+        print(f"Error processing image: {e}")
+        return None
+# --- 4. DATASET CLASS ---
+class DeepfakeDataset(Dataset):
+    def __init__(self, data_dir, sequence_length=SEQUENCE_LENGTH_DEFAULT):
+        self.data_dir = data_dir
+        self.sequence_length = sequence_length
+        self.video_files = []
+        self.labels = []
+        print(f" Scanning for videos in {data_dir}...")
+        def find_videos_in_folder(folder_path):
+            video_paths = []
+            for root, dirs, files in os.walk(folder_path):
+                for file in files:
+                    if file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
+                        video_paths.append(os.path.join(root, file))
+            return video_paths
+        # --- 1. REAL VIDEOS (Limit 400) ---
+        real_path = os.path.join(data_dir, 'real')
+        real_videos = find_videos_in_folder(real_path)
+        if len(real_videos) > 400:
+            real_videos = real_videos[:400]
+        for vid in real_videos:
+            self.video_files.append(vid)
+            self.labels.append(0)
+        # --- 2. FAKE VIDEOS (Limit 400) ---
+        fake_path = os.path.join(data_dir, 'fake')
+        fake_videos = find_videos_in_folder(fake_path)
+        if len(fake_videos) > 400:
+            fake_videos = fake_videos[:400]
+        for vid in fake_videos:
+            self.video_files.append(vid)
+            self.labels.append(1)
+        self.total_videos = len(self.video_files)
+        print(f" Total dataset size: {self.total_videos} videos")
+    def __len__(self):
+        return len(self.video_files)
+    def __getitem__(self, idx):
+        video_path = self.video_files[idx]
+        label = self.labels[idx]
+        frames = extract_frames_from_video(video_path, self.sequence_length)
+        if frames is None:
+            return torch.zeros((self.sequence_length, 3, IMG_SIZE, IMG_SIZE)), -1
+        return frames, torch.tensor(label, dtype=torch.long)
+if __name__ == "__main__":
+    ds = DeepfakeDataset('data/')

model.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import torch
+import torch.nn as nn
+from torchvision import models
+class FeatureExtractor(nn.Module):
+    """
+    Extracts spatial features from a single frame using a pre-trained ResNeXt.
+    """
+    def __init__(self, freeze=True):
+        super(FeatureExtractor, self).__init__()
+        # Load a pretrained ResNeXt50
+        # weights=models.ResNeXt50_32X4D_Weights.IMAGENET1K_V2 is the new syntax
+        self.model = models.resnext50_32x4d(weights=models.ResNeXt50_32X4D_Weights.IMAGENET1K_V2)
+        # Freeze all layers in the network
+        if freeze:
+            for param in self.model.parameters():
+                param.requires_grad = False
+        # Get the number of output features from the layer before the classifier
+        # In ResNeXt, this is self.model.fc
+        self.feature_dim = self.model.fc.in_features
+        # Remove the final classification layer (we don't need 1000 ImageNet classes)
+        # nn.Identity() is a placeholder that just passes the input through
+        self.model.fc = nn.Identity()
+    def forward(self, x):
+        # Input x has shape [B*T, C, H, W]
+        # Output will have shape [B*T, feature_dim]
+        return self.model(x)
+class DeepfakeDetector(nn.Module):
+    """
+    Combines the CNN extractor and LSTM sequencer to classify a video.
+    """
+    def __init__(self, cnn_feature_dim, lstm_hidden_size=512, lstm_layers=2, num_classes=2, dropout=0.5):
+        """
+        Args:
+            cnn_feature_dim (int): The output dimension from our FeatureExtractor (e.g., 2048 for ResNeXt50)
+            lstm_hidden_size (int): The number of features in the LSTM's hidden state.
+            lstm_layers (int): The number of stacked LSTM layers.
+            num_classes (int): The number of output classes (2: Real/Fake).
+            dropout (float): Dropout probability for regularization.
+        """
+        super(DeepfakeDetector, self).__init__()
+        self.feature_extractor = FeatureExtractor(freeze=True)
+        self.lstm_hidden_size = lstm_hidden_size
+        self.lstm_layers = lstm_layers
+        # --- Sequence Modeling (LSTM) ---
+        # The LSTM will take the CNN features for each frame as input
+        self.lstm = nn.LSTM(
+            input_size=cnn_feature_dim,
+            hidden_size=lstm_hidden_size,
+            num_layers=lstm_layers,
+            batch_first=True,       # Input shape is [BatchSize, SeqLength, Features]
+            bidirectional=True,     # It will look at the sequence forwards and backwards
+            dropout=dropout if lstm_layers > 1 else 0
+        )
+        # --- Classification Head ---
+        # We'll build a small classifier on top of the LSTM's output
+        self.fc1 = nn.Linear(
+            lstm_hidden_size * 2,  # * 2 because the LSTM is bidirectional
+            lstm_hidden_size // 2
+        )
+        self.relu = nn.ReLU()
+        self.dropout = nn.Dropout(dropout)
+        self.fc2 = nn.Linear(lstm_hidden_size // 2, num_classes) # Final output: 2 classes
+    def forward(self, x):
+        # Input x has shape: [B, T, C, H, W]
+        # B = Batch Size
+        # T = Sequence Length (e.g., 20 frames)
+        # C, H, W = Frame dimensions (3, 224, 224)
+        batch_size, seq_len, c, h, w = x.shape
+        # --- 1. Feature Extraction (CNN) ---
+        # We need to pass all frames through the CNN.
+        # Reshape to [B * T, C, H, W] to treat all frames as one big batch.
+        x_flat = x.view(batch_size * seq_len, c, h, w)
+        features = self.feature_extractor(x_flat)
+        # 'features' now has shape [B * T, cnn_feature_dim]
+        # --- 2. Sequence Modeling (LSTM) ---
+        # Reshape features back into sequences: [B, T, cnn_feature_dim]
+        features_seq = features.view(batch_size, seq_len, -1)
+        # Pass the sequence of features through the LSTM
+        # lstm_out shape: [B, T, 2 * lstm_hidden_size] (because bidirectional)
+        # h_n, c_n are the final hidden/cell states, which we don't need here
+        lstm_out, (h_n, c_n) = self.lstm(features_seq)
+        # We'll use the output from the *last* time step for classification
+        # lstm_out[:, -1, :] gets the output of the last frame in the sequence
+        last_time_step_out = lstm_out[:, -1, :]
+        # Shape is now [B, 2 * lstm_hidden_size]
+        # --- 3. Classification ---
+        # Pass the LSTM's final output through our classifier
+        x = self.dropout(self.relu(self.fc1(last_time_step_out)))
+        out = self.fc2(x)
+        # 'out' shape: [B, num_classes] (e.g., [8, 2])
+        return out

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+# --- Backend API ---
+fastapi==0.111.0
+uvicorn==0.30.1
+python-multipart==0.0.9
+# --- Core ML Libraries (Stable for Py 3.11) ---
+torch --index-url https://download.pytorch.org/whl/cpu
+torchvision --index-url https://download.pytorch.org/whl/cpu
+# --- Face Detection & Processing ---
+facenet-pytorch==2.5.3
+opencv-python-headless==4.10.0.84
+numpy==1.26.4
+pandas==2.2.2
+scikit-learn==1.5.1
+matplotlib==3.8.2
+# --- AI Text Detection (ModernBERT requires >= 4.48.0) ---
+transformers>=4.48.0
+huggingface-hub>=0.20.0
+accelerate>=0.26.0

slop_detector.py ADDED Viewed

	@@ -0,0 +1,228 @@

+"""
+AI Text Detector Integration Module
+This module integrates the slop-detector-bert model from Hugging Face
+for detecting AI-generated text content. It can be used in combination
+with the deepfake video detector for multi-modal analysis.
+Model: gouwsxander/slop-detector-bert
+- BERT-based classifier for detecting AI-generated text
+- Trained on Wikipedia human-written vs AI-rewritten paragraphs
+- Labels: LABEL_0 (HUMAN), LABEL_1 (AI)
+- This is a PEFT/LoRA adapter on bert-base-cased
+"""
+import torch
+import torch.nn.functional as F
+from typing import Optional
+from dataclasses import dataclass
+@dataclass
+class SlopDetectionResult:
+    """Result from AI text detection."""
+    text: str
+    label: str  # "HUMAN" or "AI"
+    confidence: float
+    is_ai_generated: bool
+class SlopDetector:
+    # Using the requested ModernBERT model
+    # Note: ModernBERT requires transformers >= 4.48.0
+    MODEL_NAME = "AICodexLab/answerdotai-ModernBERT-base-ai-detector"
+    def __init__(self, device: Optional[str] = None):
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self._model = None
+        self._tokenizer = None
+        self._loaded = False
+    def load_model(self) -> None:
+        """Lazily load the model from Hugging Face."""
+        if self._loaded:
+            return
+        try:
+            from transformers import AutoTokenizer, AutoModelForSequenceClassification
+            print(f"Loading ModernBERT detector on {self.device}...")
+            # Load tokenizer
+            self._tokenizer = AutoTokenizer.from_pretrained(self.MODEL_NAME)
+            # Load model
+            self._model = AutoModelForSequenceClassification.from_pretrained(
+                self.MODEL_NAME,
+                num_labels=2,
+                trust_remote_code=True
+            )
+            self._model = self._model.to(self.device)
+            self._model.eval()
+            self._loaded = True
+            print("ModernBERT detector loaded successfully!")
+        except Exception as e:
+            print(f"Error loading ModernBERT detector: {e}")
+            print("Tip: Ensure you have transformers>=4.48.0 installed.")
+            raise
+    def detect(self, text: str) -> SlopDetectionResult:
+        self.load_model()
+        if not text or not text.strip():
+            return SlopDetectionResult(
+                text=text,
+                label="UNKNOWN",
+                confidence=0.0,
+                is_ai_generated=False
+            )
+        # Tokenize with truncation
+        inputs = self._tokenizer(
+            text,
+            return_tensors="pt",
+            max_length=512,
+            truncation=True,
+            padding=True
+        )
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        with torch.no_grad():
+            outputs = self._model(**inputs)
+            probabilities = F.softmax(outputs.logits, dim=-1)
+            # Get prediction
+            predicted_class_id = probabilities.argmax().item()
+            confidence = probabilities[0, predicted_class_id].item() * 100
+        # LABEL_1 = AI, LABEL_0 = HUMAN
+        is_ai = predicted_class_id == 1
+        label = "AI" if is_ai else "HUMAN"
+        return SlopDetectionResult(
+            text=text[:500] + "..." if len(text) > 500 else text,
+            label=label,
+            confidence=confidence,
+            is_ai_generated=is_ai
+        )
+    def detect_batch(self, texts: list[str]) -> list[SlopDetectionResult]:
+        self.load_model()
+        results = []
+        for text in texts:
+            results.append(self.detect(text))
+        return results
+    def analyze_paragraphs(self, full_text: str) -> dict:
+        self.load_model()
+        # Split into paragraphs
+        paragraphs = [p.strip() for p in full_text.split('\n') if len(p.strip()) > 20]
+        if not paragraphs:
+            return {
+                "overall_label": "UNKNOWN",
+                "overall_confidence": 0.0,
+                "ai_probability": 0.0,
+                "paragraph_count": 0,
+                "ai_paragraph_count": 0,
+                "details": []
+            }
+        # Analyze each paragraph
+        paragraph_results = self.detect_batch(paragraphs)
+        # Calculate aggregate metrics
+        ai_count = sum(1 for r in paragraph_results if r.is_ai_generated)
+        ai_confidences = [r.confidence for r in paragraph_results if r.is_ai_generated]
+        human_confidences = [r.confidence for r in paragraph_results if not r.is_ai_generated]
+        # Overall probability based on paragraph analysis
+        ai_probability = (ai_count / len(paragraphs)) * 100
+        # Determine overall label (majority vote with confidence weighting)
+        if ai_count > len(paragraphs) / 2:
+            overall_label = "AI"
+            overall_confidence = sum(ai_confidences) / len(ai_confidences) if ai_confidences else 0
+        else:
+            overall_label = "HUMAN"
+            overall_confidence = sum(human_confidences) / len(human_confidences) if human_confidences else 0
+        return {
+            "overall_label": overall_label,
+            "overall_confidence": round(overall_confidence, 2),
+            "ai_probability": round(ai_probability, 2),
+            "paragraph_count": len(paragraphs),
+            "ai_paragraph_count": ai_count,
+            "details": [
+                {
+                    "paragraph_preview": r.text[:100] + "..." if len(r.text) > 100 else r.text,
+                    "label": r.label,
+                    "confidence": round(r.confidence, 2)
+                }
+                for r in paragraph_results
+            ]
+        }
+# Singleton instance for easy import
+_detector_instance: Optional[SlopDetector] = None
+def get_slop_detector() -> SlopDetector:
+    """Get or create the singleton SlopDetector instance."""
+    global _detector_instance
+    if _detector_instance is None:
+        _detector_instance = SlopDetector()
+    return _detector_instance
+def detect_ai_text(text: str) -> SlopDetectionResult:
+    detector = get_slop_detector()
+    return detector.detect(text)
+def analyze_text_content(text: str) -> dict:
+    detector = get_slop_detector()
+    return detector.analyze_paragraphs(text)
+# Example usage and testing
+if __name__ == "__main__":
+    # Test the detector
+    test_texts = [
+        # Human-like text (original Wikipedia style)
+        "Born in Bristol and raised in Glastonbury to an English father and Belgian mother, "
+        "Norris began competitive kart racing aged eight. After a successful karting career, "
+        "which culminated in his victory at the direct-drive World Championship in 2014, "
+        "Norris graduated to junior formulae.",
+        # AI-like text (more polished/structured)
+        "Born in Bristol and raised in Glastonbury to an English father and a Belgian mother, "
+        "Norris began competing in karting at the age of eight. He enjoyed a successful karting "
+        "career, culminating in his victory at the direct-drive World Championship in 2014, "
+        "before progressing into the junior single-seater categories.",
+    ]
+    print("=" * 60)
+    print("AI Text Detection Test")
+    print("=" * 60)
+    detector = SlopDetector()
+    for i, text in enumerate(test_texts, 1):
+        result = detector.detect(text)
+        print(f"\nText {i}:")
+        print(f"  Preview: {text[:80]}...")
+        print(f"  Label: {result.label}")
+        print(f"  Confidence: {result.confidence:.2f}%")
+        print(f"  Is AI Generated: {result.is_ai_generated}")