First
Browse files- Dockerfile +24 -0
- app.py +488 -0
- dataset.py +230 -0
- model.py +110 -0
- requirements.txt +21 -0
- slop_detector.py +228 -0
Dockerfile
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim-bookworm
|
| 2 |
+
|
| 3 |
+
# System deps for OpenCV, etc.
|
| 4 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 5 |
+
libgl1 \
|
| 6 |
+
libglib2.0-0 \
|
| 7 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 8 |
+
|
| 9 |
+
WORKDIR /app
|
| 10 |
+
|
| 11 |
+
# Install Python deps
|
| 12 |
+
COPY requirements.txt .
|
| 13 |
+
RUN pip install --upgrade pip && \
|
| 14 |
+
pip install --no-cache-dir -r requirements.txt
|
| 15 |
+
|
| 16 |
+
# Copy code + models
|
| 17 |
+
COPY . .
|
| 18 |
+
|
| 19 |
+
# Cloud Run will set PORT env; default to 8080 if not set
|
| 20 |
+
ENV PORT=8080
|
| 21 |
+
EXPOSE 8080
|
| 22 |
+
|
| 23 |
+
# Use sh -c so ${PORT} is expanded by the shell
|
| 24 |
+
CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port ${PORT:-8080}"]
|
app.py
ADDED
|
@@ -0,0 +1,488 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, UploadFile, File, HTTPException, Form
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
from typing import Optional
|
| 5 |
+
from contextlib import asynccontextmanager
|
| 6 |
+
import torch
|
| 7 |
+
import os
|
| 8 |
+
import shutil
|
| 9 |
+
import tempfile
|
| 10 |
+
import torch.nn.functional as F
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
|
| 13 |
+
from model import DeepfakeDetector, FeatureExtractor
|
| 14 |
+
from dataset import extract_frames_from_video, process_image
|
| 15 |
+
from slop_detector import SlopDetector, detect_ai_text, analyze_text_content
|
| 16 |
+
|
| 17 |
+
BASE_DIR = Path(__file__).resolve().parent
|
| 18 |
+
SEQUENCE_LENGTH = 10
|
| 19 |
+
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 20 |
+
|
| 21 |
+
@asynccontextmanager
|
| 22 |
+
async def lifespan(app: FastAPI):
|
| 23 |
+
# --- Startup: Load Models Eagerly ---
|
| 24 |
+
print("Startup: Pre-loading default models to avoid delay...")
|
| 25 |
+
try:
|
| 26 |
+
# Load Video Model
|
| 27 |
+
load_model_if_needed()
|
| 28 |
+
|
| 29 |
+
# Load Text Model
|
| 30 |
+
load_slop_detector_if_needed()
|
| 31 |
+
print("Startup: All models loaded and ready!")
|
| 32 |
+
except Exception as e:
|
| 33 |
+
print(f"Startup Warning: Could not pre-load models: {e}")
|
| 34 |
+
|
| 35 |
+
yield
|
| 36 |
+
|
| 37 |
+
# --- Shutdown (Cleanup if needed) ---
|
| 38 |
+
print("Shutdown: Cleaning up...")
|
| 39 |
+
|
| 40 |
+
app = FastAPI(lifespan=lifespan)
|
| 41 |
+
|
| 42 |
+
allowed_origins = [
|
| 43 |
+
"http://localhost:5173", # local vite
|
| 44 |
+
"http://localhost:8080", # if you're using that
|
| 45 |
+
"https://deepfake-detection-lime.vercel.app/", # ← replace with real URL after first deploy
|
| 46 |
+
]
|
| 47 |
+
app.add_middleware(
|
| 48 |
+
CORSMiddleware,
|
| 49 |
+
allow_origins=["*"], # tighten in prod
|
| 50 |
+
allow_credentials=True,
|
| 51 |
+
allow_methods=["*"],
|
| 52 |
+
allow_headers=["*"],
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
# --- Model Paths ---
|
| 56 |
+
SAVED_MODEL_PATH = BASE_DIR / "saved_models" / "deepfake_detector_best.pth"
|
| 57 |
+
|
| 58 |
+
model = None
|
| 59 |
+
feature_dim = None
|
| 60 |
+
model_error: str | None = None
|
| 61 |
+
|
| 62 |
+
# Slop detector for AI text detection
|
| 63 |
+
slop_detector = None
|
| 64 |
+
slop_detector_error: str | None = None
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# Pydantic models for request/response
|
| 68 |
+
class TextAnalysisRequest(BaseModel):
|
| 69 |
+
text: str
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
class TextAnalysisResponse(BaseModel):
|
| 73 |
+
status: str
|
| 74 |
+
label: str
|
| 75 |
+
confidence: float
|
| 76 |
+
is_ai_generated: bool
|
| 77 |
+
details: Optional[dict] = None
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def load_model_if_needed():
|
| 81 |
+
global model, feature_dim, model_error
|
| 82 |
+
|
| 83 |
+
if model is not None:
|
| 84 |
+
return
|
| 85 |
+
|
| 86 |
+
print("Loading deepfake model lazily on first request...")
|
| 87 |
+
try:
|
| 88 |
+
temp_cnn = FeatureExtractor(freeze=True)
|
| 89 |
+
feature_dim_local = temp_cnn.feature_dim
|
| 90 |
+
del temp_cnn
|
| 91 |
+
|
| 92 |
+
m = DeepfakeDetector(
|
| 93 |
+
cnn_feature_dim=feature_dim_local,
|
| 94 |
+
lstm_hidden_size=512,
|
| 95 |
+
lstm_layers=2,
|
| 96 |
+
).to(DEVICE)
|
| 97 |
+
|
| 98 |
+
if not os.path.exists(SAVED_MODEL_PATH):
|
| 99 |
+
err = f"Model file not found at: {SAVED_MODEL_PATH}"
|
| 100 |
+
print("Error:", err)
|
| 101 |
+
model_error = err
|
| 102 |
+
return
|
| 103 |
+
|
| 104 |
+
state = torch.load(SAVED_MODEL_PATH, map_location=DEVICE)
|
| 105 |
+
m.load_state_dict(state)
|
| 106 |
+
m.eval()
|
| 107 |
+
|
| 108 |
+
# Update globals
|
| 109 |
+
model_error = None
|
| 110 |
+
globals()["model"] = m
|
| 111 |
+
globals()["feature_dim"] = feature_dim_local
|
| 112 |
+
|
| 113 |
+
print("Model loaded successfully!")
|
| 114 |
+
except Exception as e:
|
| 115 |
+
model_error = str(e)
|
| 116 |
+
print(f"Error loading model: {e}")
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def load_slop_detector_if_needed():
|
| 120 |
+
global slop_detector, slop_detector_error
|
| 121 |
+
|
| 122 |
+
if slop_detector is not None:
|
| 123 |
+
return
|
| 124 |
+
|
| 125 |
+
print("Loading slop detector for AI text detection...")
|
| 126 |
+
try:
|
| 127 |
+
detector = SlopDetector(device=str(DEVICE))
|
| 128 |
+
detector.load_model()
|
| 129 |
+
|
| 130 |
+
slop_detector_error = None
|
| 131 |
+
globals()["slop_detector"] = detector
|
| 132 |
+
|
| 133 |
+
print("Slop detector loaded successfully!")
|
| 134 |
+
except Exception as e:
|
| 135 |
+
slop_detector_error = str(e)
|
| 136 |
+
print(f"Error loading slop detector: {e}")
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
@app.get("/")
|
| 140 |
+
def root():
|
| 141 |
+
return {"message": "Deepfake detector backend running"}
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
@app.get("/health")
|
| 145 |
+
def health():
|
| 146 |
+
status_info = {}
|
| 147 |
+
|
| 148 |
+
# Check deepfake model status
|
| 149 |
+
if model_error is not None:
|
| 150 |
+
status_info["deepfake_model"] = {"status": "error", "detail": model_error}
|
| 151 |
+
elif model is None:
|
| 152 |
+
status_info["deepfake_model"] = {"status": "not_loaded_yet"}
|
| 153 |
+
else:
|
| 154 |
+
status_info["deepfake_model"] = {"status": "ok"}
|
| 155 |
+
|
| 156 |
+
# Check slop detector status
|
| 157 |
+
if slop_detector_error is not None:
|
| 158 |
+
status_info["slop_detector"] = {"status": "error", "detail": slop_detector_error}
|
| 159 |
+
elif slop_detector is None:
|
| 160 |
+
status_info["slop_detector"] = {"status": "not_loaded_yet"}
|
| 161 |
+
else:
|
| 162 |
+
status_info["slop_detector"] = {"status": "ok"}
|
| 163 |
+
|
| 164 |
+
overall_status = "ok"
|
| 165 |
+
if model_error or slop_detector_error:
|
| 166 |
+
overall_status = "partial_error"
|
| 167 |
+
elif model is None and slop_detector is None:
|
| 168 |
+
overall_status = "models_not_loaded_yet"
|
| 169 |
+
|
| 170 |
+
return {"status": overall_status, "models": status_info}
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
@app.post("/predict")
|
| 174 |
+
async def predict_video(file: UploadFile = File(...)):
|
| 175 |
+
# Lazy load model on first request
|
| 176 |
+
load_model_if_needed()
|
| 177 |
+
|
| 178 |
+
if model is None:
|
| 179 |
+
# loading failed
|
| 180 |
+
raise HTTPException(
|
| 181 |
+
status_code=503,
|
| 182 |
+
detail=f"Model not available on server. Error: {model_error}",
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
if not file.filename.lower().endswith((".mp4", ".mov", ".avi")):
|
| 186 |
+
raise HTTPException(
|
| 187 |
+
status_code=400,
|
| 188 |
+
detail="Invalid file type. Please upload .mp4, .mov, or .avi",
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
# Save uploaded file to temp path
|
| 192 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
|
| 193 |
+
shutil.copyfileobj(file.file, temp_file)
|
| 194 |
+
temp_file_path = temp_file.name
|
| 195 |
+
|
| 196 |
+
try:
|
| 197 |
+
frames_tensor = extract_frames_from_video(
|
| 198 |
+
video_path=temp_file_path,
|
| 199 |
+
sequence_length=SEQUENCE_LENGTH,
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
if frames_tensor is None:
|
| 203 |
+
return {
|
| 204 |
+
"status": "error",
|
| 205 |
+
"message": "Could not detect a face in the video.",
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
frames_tensor = frames_tensor.unsqueeze(0).to(DEVICE)
|
| 209 |
+
|
| 210 |
+
with torch.no_grad():
|
| 211 |
+
output = model(frames_tensor)
|
| 212 |
+
probabilities = F.softmax(output, dim=1)
|
| 213 |
+
confidence, predicted_class = torch.max(probabilities, 1)
|
| 214 |
+
|
| 215 |
+
prediction_idx = predicted_class.item()
|
| 216 |
+
conf_score = confidence.item() * 100
|
| 217 |
+
result_label = "FAKE" if prediction_idx == 1 else "REAL"
|
| 218 |
+
|
| 219 |
+
return {
|
| 220 |
+
"status": "success",
|
| 221 |
+
"filename": file.filename,
|
| 222 |
+
"prediction": result_label,
|
| 223 |
+
"confidence": round(conf_score, 2),
|
| 224 |
+
"is_fake": prediction_idx == 1,
|
| 225 |
+
}
|
| 226 |
+
except Exception as e:
|
| 227 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 228 |
+
finally:
|
| 229 |
+
os.remove(temp_file_path)
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
@app.post("/analyze-image")
|
| 233 |
+
async def analyze_image(file: UploadFile = File(...)):
|
| 234 |
+
# Lazy load model on first request
|
| 235 |
+
load_model_if_needed()
|
| 236 |
+
|
| 237 |
+
if model is None:
|
| 238 |
+
raise HTTPException(
|
| 239 |
+
status_code=503,
|
| 240 |
+
detail=f"Model not available on server. Error: {model_error}",
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
if not file.filename.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
|
| 244 |
+
raise HTTPException(
|
| 245 |
+
status_code=400,
|
| 246 |
+
detail="Invalid file type. Please upload .jpg, .jpeg, .png, or .webp",
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
# Save uploaded file to temp path
|
| 250 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
|
| 251 |
+
shutil.copyfileobj(file.file, temp_file)
|
| 252 |
+
temp_file_path = temp_file.name
|
| 253 |
+
|
| 254 |
+
try:
|
| 255 |
+
# Use the new process_image function
|
| 256 |
+
# This will return a tensor of shape [SEQUENCE_LENGTH, 3, 224, 224]
|
| 257 |
+
# essentially treating the image as a static video
|
| 258 |
+
frames_tensor = process_image(
|
| 259 |
+
image_path=temp_file_path,
|
| 260 |
+
sequence_length=SEQUENCE_LENGTH,
|
| 261 |
+
)
|
| 262 |
+
|
| 263 |
+
if frames_tensor is None:
|
| 264 |
+
return {
|
| 265 |
+
"status": "error",
|
| 266 |
+
"message": "Could not detect a face in the image.",
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
frames_tensor = frames_tensor.unsqueeze(0).to(DEVICE)
|
| 270 |
+
|
| 271 |
+
with torch.no_grad():
|
| 272 |
+
output = model(frames_tensor)
|
| 273 |
+
probabilities = F.softmax(output, dim=1)
|
| 274 |
+
confidence, predicted_class = torch.max(probabilities, 1)
|
| 275 |
+
|
| 276 |
+
prediction_idx = predicted_class.item()
|
| 277 |
+
conf_score = confidence.item() * 100
|
| 278 |
+
result_label = "FAKE" if prediction_idx == 1 else "REAL"
|
| 279 |
+
|
| 280 |
+
return {
|
| 281 |
+
"status": "success",
|
| 282 |
+
"filename": file.filename,
|
| 283 |
+
"prediction": result_label,
|
| 284 |
+
"confidence": round(conf_score, 2),
|
| 285 |
+
"is_fake": prediction_idx == 1,
|
| 286 |
+
"type": "image_analysis"
|
| 287 |
+
}
|
| 288 |
+
except Exception as e:
|
| 289 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 290 |
+
finally:
|
| 291 |
+
if os.path.exists(temp_file_path):
|
| 292 |
+
os.remove(temp_file_path)
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
@app.post("/analyze-text")
|
| 296 |
+
async def analyze_text(request: TextAnalysisRequest):
|
| 297 |
+
load_slop_detector_if_needed()
|
| 298 |
+
|
| 299 |
+
if slop_detector is None:
|
| 300 |
+
raise HTTPException(
|
| 301 |
+
status_code=503,
|
| 302 |
+
detail=f"Slop detector not available. Error: {slop_detector_error}",
|
| 303 |
+
)
|
| 304 |
+
|
| 305 |
+
try:
|
| 306 |
+
result = slop_detector.detect(request.text)
|
| 307 |
+
|
| 308 |
+
return {
|
| 309 |
+
"status": "success",
|
| 310 |
+
"label": result.label,
|
| 311 |
+
"confidence": round(result.confidence, 2),
|
| 312 |
+
"is_ai_generated": result.is_ai_generated,
|
| 313 |
+
}
|
| 314 |
+
except Exception as e:
|
| 315 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
@app.post("/analyze-text-detailed")
|
| 319 |
+
async def analyze_text_detailed(request: TextAnalysisRequest):
|
| 320 |
+
load_slop_detector_if_needed()
|
| 321 |
+
|
| 322 |
+
if slop_detector is None:
|
| 323 |
+
raise HTTPException(
|
| 324 |
+
status_code=503,
|
| 325 |
+
detail=f"Slop detector not available. Error: {slop_detector_error}",
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
try:
|
| 329 |
+
analysis = slop_detector.analyze_paragraphs(request.text)
|
| 330 |
+
|
| 331 |
+
return {
|
| 332 |
+
"status": "success",
|
| 333 |
+
**analysis
|
| 334 |
+
}
|
| 335 |
+
except Exception as e:
|
| 336 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
@app.post("/predict-combined")
|
| 340 |
+
async def predict_combined(
|
| 341 |
+
file: UploadFile = File(...),
|
| 342 |
+
context_text: Optional[str] = Form(None),
|
| 343 |
+
):
|
| 344 |
+
# Load both models
|
| 345 |
+
load_model_if_needed()
|
| 346 |
+
|
| 347 |
+
if model is None:
|
| 348 |
+
raise HTTPException(
|
| 349 |
+
status_code=503,
|
| 350 |
+
detail=f"Deepfake model not available. Error: {model_error}",
|
| 351 |
+
)
|
| 352 |
+
|
| 353 |
+
if not file.filename.lower().endswith((".mp4", ".mov", ".avi")):
|
| 354 |
+
raise HTTPException(
|
| 355 |
+
status_code=400,
|
| 356 |
+
detail="Invalid file type. Please upload .mp4, .mov, or .avi",
|
| 357 |
+
)
|
| 358 |
+
|
| 359 |
+
# Save uploaded file to temp path
|
| 360 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
|
| 361 |
+
shutil.copyfileobj(file.file, temp_file)
|
| 362 |
+
temp_file_path = temp_file.name
|
| 363 |
+
|
| 364 |
+
try:
|
| 365 |
+
# --- Video Deepfake Detection ---
|
| 366 |
+
frames_tensor = extract_frames_from_video(
|
| 367 |
+
video_path=temp_file_path,
|
| 368 |
+
sequence_length=SEQUENCE_LENGTH,
|
| 369 |
+
)
|
| 370 |
+
|
| 371 |
+
if frames_tensor is None:
|
| 372 |
+
video_result = {
|
| 373 |
+
"status": "error",
|
| 374 |
+
"message": "Could not detect a face in the video.",
|
| 375 |
+
"prediction": None,
|
| 376 |
+
"confidence": None,
|
| 377 |
+
"is_fake": None,
|
| 378 |
+
}
|
| 379 |
+
else:
|
| 380 |
+
frames_tensor = frames_tensor.unsqueeze(0).to(DEVICE)
|
| 381 |
+
|
| 382 |
+
with torch.no_grad():
|
| 383 |
+
output = model(frames_tensor)
|
| 384 |
+
probabilities = F.softmax(output, dim=1)
|
| 385 |
+
confidence, predicted_class = torch.max(probabilities, 1)
|
| 386 |
+
|
| 387 |
+
prediction_idx = predicted_class.item()
|
| 388 |
+
conf_score = confidence.item() * 100
|
| 389 |
+
result_label = "FAKE" if prediction_idx == 1 else "REAL"
|
| 390 |
+
|
| 391 |
+
video_result = {
|
| 392 |
+
"status": "success",
|
| 393 |
+
"prediction": result_label,
|
| 394 |
+
"confidence": round(conf_score, 2),
|
| 395 |
+
"is_fake": prediction_idx == 1,
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
# --- Text Context Analysis (if provided) ---
|
| 399 |
+
text_result = None
|
| 400 |
+
if context_text and context_text.strip():
|
| 401 |
+
load_slop_detector_if_needed()
|
| 402 |
+
|
| 403 |
+
if slop_detector is not None:
|
| 404 |
+
text_analysis = slop_detector.analyze_paragraphs(context_text)
|
| 405 |
+
text_result = {
|
| 406 |
+
"status": "success",
|
| 407 |
+
"overall_label": text_analysis["overall_label"],
|
| 408 |
+
"overall_confidence": text_analysis["overall_confidence"],
|
| 409 |
+
"ai_probability": text_analysis["ai_probability"],
|
| 410 |
+
"paragraph_count": text_analysis["paragraph_count"],
|
| 411 |
+
"ai_paragraph_count": text_analysis["ai_paragraph_count"],
|
| 412 |
+
}
|
| 413 |
+
else:
|
| 414 |
+
text_result = {
|
| 415 |
+
"status": "error",
|
| 416 |
+
"message": f"Slop detector not available: {slop_detector_error}"
|
| 417 |
+
}
|
| 418 |
+
|
| 419 |
+
# --- Combined Assessment ---
|
| 420 |
+
combined_verdict = determine_combined_verdict(video_result, text_result)
|
| 421 |
+
|
| 422 |
+
return {
|
| 423 |
+
"status": "success",
|
| 424 |
+
"filename": file.filename,
|
| 425 |
+
"video_analysis": video_result,
|
| 426 |
+
"text_analysis": text_result,
|
| 427 |
+
"combined_verdict": combined_verdict,
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
except Exception as e:
|
| 431 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 432 |
+
finally:
|
| 433 |
+
if os.path.exists(temp_file_path):
|
| 434 |
+
os.remove(temp_file_path)
|
| 435 |
+
|
| 436 |
+
|
| 437 |
+
def determine_combined_verdict(video_result: dict, text_result: Optional[dict]) -> dict:
|
| 438 |
+
video_fake = video_result.get("is_fake")
|
| 439 |
+
video_confidence = video_result.get("confidence", 0)
|
| 440 |
+
video_status = video_result.get("status")
|
| 441 |
+
|
| 442 |
+
text_ai = None
|
| 443 |
+
text_confidence = None
|
| 444 |
+
|
| 445 |
+
if text_result and text_result.get("status") == "success":
|
| 446 |
+
text_ai = text_result.get("overall_label") == "AI"
|
| 447 |
+
text_confidence = text_result.get("overall_confidence", 0)
|
| 448 |
+
|
| 449 |
+
# Determine verdict
|
| 450 |
+
if video_status == "error":
|
| 451 |
+
return {
|
| 452 |
+
"verdict": "INCONCLUSIVE",
|
| 453 |
+
"severity": "unknown",
|
| 454 |
+
"explanation": "Could not analyze video (no face detected). " +
|
| 455 |
+
(f"Text appears {'AI-generated' if text_ai else 'human-written'}." if text_ai is not None else "")
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
if video_fake and text_ai:
|
| 459 |
+
return {
|
| 460 |
+
"verdict": "HIGH_RISK_DEEPFAKE",
|
| 461 |
+
"severity": "high",
|
| 462 |
+
"explanation": f"Video detected as FAKE ({video_confidence:.1f}% confidence) AND associated text appears AI-generated ({text_confidence:.1f}% confidence). This combination suggests sophisticated manipulation."
|
| 463 |
+
}
|
| 464 |
+
elif video_fake and text_ai is False:
|
| 465 |
+
return {
|
| 466 |
+
"verdict": "DEEPFAKE_DETECTED",
|
| 467 |
+
"severity": "high",
|
| 468 |
+
"explanation": f"Video detected as FAKE ({video_confidence:.1f}% confidence). Associated text appears human-written."
|
| 469 |
+
}
|
| 470 |
+
elif video_fake and text_ai is None:
|
| 471 |
+
return {
|
| 472 |
+
"verdict": "DEEPFAKE_DETECTED",
|
| 473 |
+
"severity": "high",
|
| 474 |
+
"explanation": f"Video detected as FAKE ({video_confidence:.1f}% confidence). No text context provided for additional analysis."
|
| 475 |
+
}
|
| 476 |
+
elif not video_fake and text_ai:
|
| 477 |
+
return {
|
| 478 |
+
"verdict": "SUSPICIOUS_CONTEXT",
|
| 479 |
+
"severity": "medium",
|
| 480 |
+
"explanation": f"Video appears REAL ({video_confidence:.1f}% confidence), but associated text appears AI-generated ({text_confidence:.1f}% confidence). Context may be misleading."
|
| 481 |
+
}
|
| 482 |
+
else:
|
| 483 |
+
return {
|
| 484 |
+
"verdict": "LIKELY_AUTHENTIC",
|
| 485 |
+
"severity": "low",
|
| 486 |
+
"explanation": f"Video appears REAL ({video_confidence:.1f}% confidence)." +
|
| 487 |
+
(f" Associated text appears human-written ({text_confidence:.1f}% confidence)." if text_ai is False else "")
|
| 488 |
+
}
|
dataset.py
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import os
|
| 3 |
+
import torch
|
| 4 |
+
import numpy as np
|
| 5 |
+
from torch.utils.data import Dataset
|
| 6 |
+
from torchvision import transforms
|
| 7 |
+
# Import Facenet-PyTorch for Face Detection (No TensorFlow needed)
|
| 8 |
+
from facenet_pytorch import MTCNN
|
| 9 |
+
|
| 10 |
+
# --- 1. CONFIGURATION ---
|
| 11 |
+
# 10 frames is enough for a resume project and runs faster on CPU
|
| 12 |
+
SEQUENCE_LENGTH_DEFAULT = 10
|
| 13 |
+
IMG_SIZE = 224
|
| 14 |
+
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 15 |
+
|
| 16 |
+
# --- 2. INITIALIZE MTCNN ---
|
| 17 |
+
print(f"Initializing MTCNN on {DEVICE}...")
|
| 18 |
+
# keep_all=True returns all faces, we'll sort them.
|
| 19 |
+
# select_largest=False because we manually sort by confidence/size if needed, but 'keep_all=False' (default) returns only best face?
|
| 20 |
+
# actually detect returns all.
|
| 21 |
+
mtcnn_detector = MTCNN(keep_all=True, device=DEVICE)
|
| 22 |
+
|
| 23 |
+
# Standard normalization
|
| 24 |
+
data_transforms = transforms.Compose([
|
| 25 |
+
transforms.ToPILImage(),
|
| 26 |
+
transforms.Resize((IMG_SIZE, IMG_SIZE)),
|
| 27 |
+
transforms.ToTensor(),
|
| 28 |
+
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
| 29 |
+
])
|
| 30 |
+
|
| 31 |
+
# --- 3. PREPROCESSING FUNCTION ---
|
| 32 |
+
def extract_frames_from_video(video_path, sequence_length=SEQUENCE_LENGTH_DEFAULT):
|
| 33 |
+
cap = cv2.VideoCapture(video_path)
|
| 34 |
+
if not cap.isOpened():
|
| 35 |
+
return None
|
| 36 |
+
|
| 37 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 38 |
+
if total_frames <= 0:
|
| 39 |
+
return None
|
| 40 |
+
|
| 41 |
+
processed_frames = []
|
| 42 |
+
frame_indices = np.linspace(0, total_frames - 1, sequence_length, dtype=int)
|
| 43 |
+
|
| 44 |
+
for i in frame_indices:
|
| 45 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, i)
|
| 46 |
+
ret, frame = cap.read()
|
| 47 |
+
if not ret: continue
|
| 48 |
+
|
| 49 |
+
# Convert to RGB for MTCNN (OpenCV is BGR)
|
| 50 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 51 |
+
|
| 52 |
+
try:
|
| 53 |
+
# Detect faces
|
| 54 |
+
# boxes given as [x1, y1, x2, y2]
|
| 55 |
+
boxes, probs = mtcnn_detector.detect(frame_rgb)
|
| 56 |
+
|
| 57 |
+
if boxes is not None and len(boxes) > 0:
|
| 58 |
+
# Get highest probability face or first one?
|
| 59 |
+
# probs is list of probabilities. Filter valid ones.
|
| 60 |
+
# Just take the one with standard highest probability.
|
| 61 |
+
|
| 62 |
+
# Combine boxes and probs to sort
|
| 63 |
+
face_list = []
|
| 64 |
+
for box, prob in zip(boxes, probs):
|
| 65 |
+
if prob is None: continue
|
| 66 |
+
face_list.append({'box': box, 'conf': prob})
|
| 67 |
+
|
| 68 |
+
if not face_list: continue
|
| 69 |
+
|
| 70 |
+
best_face = sorted(face_list, key=lambda x: x['conf'], reverse=True)[0]
|
| 71 |
+
x1, y1, x2, y2 = best_face['box']
|
| 72 |
+
|
| 73 |
+
w = x2 - x1
|
| 74 |
+
h = y2 - y1
|
| 75 |
+
x = x1
|
| 76 |
+
y = y1
|
| 77 |
+
|
| 78 |
+
# Fix negative coordinates and float
|
| 79 |
+
x, y = max(0, int(x)), max(0, int(y))
|
| 80 |
+
w, h = int(w), int(h)
|
| 81 |
+
|
| 82 |
+
# Add padding (10%)
|
| 83 |
+
pad_w = int(w * 0.1)
|
| 84 |
+
pad_h = int(h * 0.1)
|
| 85 |
+
|
| 86 |
+
img_h, img_w, _ = frame.shape
|
| 87 |
+
y_min = max(0, y - pad_h)
|
| 88 |
+
y_max = min(img_h, y + h + pad_h)
|
| 89 |
+
x_min = max(0, x - pad_w)
|
| 90 |
+
x_max = min(img_w, x + w + pad_w)
|
| 91 |
+
|
| 92 |
+
face_crop = frame[y_min:y_max, x_min:x_max]
|
| 93 |
+
|
| 94 |
+
if face_crop.size != 0:
|
| 95 |
+
processed_frame = data_transforms(face_crop)
|
| 96 |
+
processed_frames.append(processed_frame)
|
| 97 |
+
except Exception as e:
|
| 98 |
+
# print(f"Frame processing error: {e}")
|
| 99 |
+
continue
|
| 100 |
+
|
| 101 |
+
cap.release()
|
| 102 |
+
|
| 103 |
+
if not processed_frames:
|
| 104 |
+
return None
|
| 105 |
+
|
| 106 |
+
# Padding if we missed some frames due to detection failure
|
| 107 |
+
while len(processed_frames) < sequence_length:
|
| 108 |
+
processed_frames.append(processed_frames[-1])
|
| 109 |
+
|
| 110 |
+
return torch.stack(processed_frames[:sequence_length])
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
# --- 3b. IMAGE PROCESSING FUNCTION ---
|
| 114 |
+
def process_image(image_path, sequence_length=SEQUENCE_LENGTH_DEFAULT):
|
| 115 |
+
try:
|
| 116 |
+
frame = cv2.imread(image_path)
|
| 117 |
+
if frame is None:
|
| 118 |
+
return None
|
| 119 |
+
|
| 120 |
+
# Convert to RGB
|
| 121 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 122 |
+
|
| 123 |
+
# Detect faces
|
| 124 |
+
boxes, probs = mtcnn_detector.detect(frame_rgb)
|
| 125 |
+
|
| 126 |
+
if boxes is None or len(boxes) == 0:
|
| 127 |
+
return None
|
| 128 |
+
|
| 129 |
+
face_list = []
|
| 130 |
+
for box, prob in zip(boxes, probs):
|
| 131 |
+
if prob is None: continue
|
| 132 |
+
face_list.append({'box': box, 'conf': prob})
|
| 133 |
+
|
| 134 |
+
if not face_list: return None
|
| 135 |
+
|
| 136 |
+
best_face = sorted(face_list, key=lambda x: x['conf'], reverse=True)[0]
|
| 137 |
+
x1, y1, x2, y2 = best_face['box']
|
| 138 |
+
|
| 139 |
+
w = x2 - x1
|
| 140 |
+
h = y2 - y1
|
| 141 |
+
x = x1
|
| 142 |
+
y = y1
|
| 143 |
+
|
| 144 |
+
# Integer conversion and padding
|
| 145 |
+
x, y = max(0, int(x)), max(0, int(y))
|
| 146 |
+
w, h = int(w), int(h)
|
| 147 |
+
|
| 148 |
+
pad_w = int(w * 0.1)
|
| 149 |
+
pad_h = int(h * 0.1)
|
| 150 |
+
|
| 151 |
+
img_h, img_w, _ = frame.shape
|
| 152 |
+
y_min = max(0, y - pad_h)
|
| 153 |
+
y_max = min(img_h, y + h + pad_h)
|
| 154 |
+
x_min = max(0, x - pad_w)
|
| 155 |
+
x_max = min(img_w, x + w + pad_w)
|
| 156 |
+
|
| 157 |
+
face_crop = frame[y_min:y_max, x_min:x_max]
|
| 158 |
+
|
| 159 |
+
if face_crop.size == 0:
|
| 160 |
+
return None
|
| 161 |
+
|
| 162 |
+
processed_frame = data_transforms(face_crop) # [3, 224, 224]
|
| 163 |
+
|
| 164 |
+
# Repeat this frame to create a fake sequence
|
| 165 |
+
return processed_frame.unsqueeze(0).repeat(sequence_length, 1, 1, 1)
|
| 166 |
+
|
| 167 |
+
except Exception as e:
|
| 168 |
+
print(f"Error processing image: {e}")
|
| 169 |
+
return None
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
# --- 4. DATASET CLASS ---
|
| 173 |
+
class DeepfakeDataset(Dataset):
|
| 174 |
+
def __init__(self, data_dir, sequence_length=SEQUENCE_LENGTH_DEFAULT):
|
| 175 |
+
self.data_dir = data_dir
|
| 176 |
+
self.sequence_length = sequence_length
|
| 177 |
+
self.video_files = []
|
| 178 |
+
self.labels = []
|
| 179 |
+
|
| 180 |
+
print(f" Scanning for videos in {data_dir}...")
|
| 181 |
+
|
| 182 |
+
def find_videos_in_folder(folder_path):
|
| 183 |
+
video_paths = []
|
| 184 |
+
for root, dirs, files in os.walk(folder_path):
|
| 185 |
+
for file in files:
|
| 186 |
+
if file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
|
| 187 |
+
video_paths.append(os.path.join(root, file))
|
| 188 |
+
return video_paths
|
| 189 |
+
|
| 190 |
+
# --- 1. REAL VIDEOS (Limit 400) ---
|
| 191 |
+
real_path = os.path.join(data_dir, 'real')
|
| 192 |
+
real_videos = find_videos_in_folder(real_path)
|
| 193 |
+
|
| 194 |
+
if len(real_videos) > 400:
|
| 195 |
+
real_videos = real_videos[:400]
|
| 196 |
+
|
| 197 |
+
for vid in real_videos:
|
| 198 |
+
self.video_files.append(vid)
|
| 199 |
+
self.labels.append(0)
|
| 200 |
+
|
| 201 |
+
# --- 2. FAKE VIDEOS (Limit 400) ---
|
| 202 |
+
fake_path = os.path.join(data_dir, 'fake')
|
| 203 |
+
fake_videos = find_videos_in_folder(fake_path)
|
| 204 |
+
|
| 205 |
+
if len(fake_videos) > 400:
|
| 206 |
+
fake_videos = fake_videos[:400]
|
| 207 |
+
|
| 208 |
+
for vid in fake_videos:
|
| 209 |
+
self.video_files.append(vid)
|
| 210 |
+
self.labels.append(1)
|
| 211 |
+
|
| 212 |
+
self.total_videos = len(self.video_files)
|
| 213 |
+
print(f" Total dataset size: {self.total_videos} videos")
|
| 214 |
+
|
| 215 |
+
def __len__(self):
|
| 216 |
+
return len(self.video_files)
|
| 217 |
+
|
| 218 |
+
def __getitem__(self, idx):
|
| 219 |
+
video_path = self.video_files[idx]
|
| 220 |
+
label = self.labels[idx]
|
| 221 |
+
|
| 222 |
+
frames = extract_frames_from_video(video_path, self.sequence_length)
|
| 223 |
+
|
| 224 |
+
if frames is None:
|
| 225 |
+
return torch.zeros((self.sequence_length, 3, IMG_SIZE, IMG_SIZE)), -1
|
| 226 |
+
|
| 227 |
+
return frames, torch.tensor(label, dtype=torch.long)
|
| 228 |
+
|
| 229 |
+
if __name__ == "__main__":
|
| 230 |
+
ds = DeepfakeDataset('data/')
|
model.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
from torchvision import models
|
| 4 |
+
|
| 5 |
+
class FeatureExtractor(nn.Module):
|
| 6 |
+
"""
|
| 7 |
+
Extracts spatial features from a single frame using a pre-trained ResNeXt.
|
| 8 |
+
"""
|
| 9 |
+
def __init__(self, freeze=True):
|
| 10 |
+
super(FeatureExtractor, self).__init__()
|
| 11 |
+
|
| 12 |
+
# Load a pretrained ResNeXt50
|
| 13 |
+
# weights=models.ResNeXt50_32X4D_Weights.IMAGENET1K_V2 is the new syntax
|
| 14 |
+
self.model = models.resnext50_32x4d(weights=models.ResNeXt50_32X4D_Weights.IMAGENET1K_V2)
|
| 15 |
+
|
| 16 |
+
# Freeze all layers in the network
|
| 17 |
+
if freeze:
|
| 18 |
+
for param in self.model.parameters():
|
| 19 |
+
param.requires_grad = False
|
| 20 |
+
|
| 21 |
+
# Get the number of output features from the layer before the classifier
|
| 22 |
+
# In ResNeXt, this is self.model.fc
|
| 23 |
+
self.feature_dim = self.model.fc.in_features
|
| 24 |
+
|
| 25 |
+
# Remove the final classification layer (we don't need 1000 ImageNet classes)
|
| 26 |
+
# nn.Identity() is a placeholder that just passes the input through
|
| 27 |
+
self.model.fc = nn.Identity()
|
| 28 |
+
|
| 29 |
+
def forward(self, x):
|
| 30 |
+
# Input x has shape [B*T, C, H, W]
|
| 31 |
+
# Output will have shape [B*T, feature_dim]
|
| 32 |
+
return self.model(x)
|
| 33 |
+
|
| 34 |
+
class DeepfakeDetector(nn.Module):
|
| 35 |
+
"""
|
| 36 |
+
Combines the CNN extractor and LSTM sequencer to classify a video.
|
| 37 |
+
"""
|
| 38 |
+
def __init__(self, cnn_feature_dim, lstm_hidden_size=512, lstm_layers=2, num_classes=2, dropout=0.5):
|
| 39 |
+
"""
|
| 40 |
+
Args:
|
| 41 |
+
cnn_feature_dim (int): The output dimension from our FeatureExtractor (e.g., 2048 for ResNeXt50)
|
| 42 |
+
lstm_hidden_size (int): The number of features in the LSTM's hidden state.
|
| 43 |
+
lstm_layers (int): The number of stacked LSTM layers.
|
| 44 |
+
num_classes (int): The number of output classes (2: Real/Fake).
|
| 45 |
+
dropout (float): Dropout probability for regularization.
|
| 46 |
+
"""
|
| 47 |
+
super(DeepfakeDetector, self).__init__()
|
| 48 |
+
|
| 49 |
+
self.feature_extractor = FeatureExtractor(freeze=True)
|
| 50 |
+
self.lstm_hidden_size = lstm_hidden_size
|
| 51 |
+
self.lstm_layers = lstm_layers
|
| 52 |
+
|
| 53 |
+
# --- Sequence Modeling (LSTM) ---
|
| 54 |
+
# The LSTM will take the CNN features for each frame as input
|
| 55 |
+
self.lstm = nn.LSTM(
|
| 56 |
+
input_size=cnn_feature_dim,
|
| 57 |
+
hidden_size=lstm_hidden_size,
|
| 58 |
+
num_layers=lstm_layers,
|
| 59 |
+
batch_first=True, # Input shape is [BatchSize, SeqLength, Features]
|
| 60 |
+
bidirectional=True, # It will look at the sequence forwards and backwards
|
| 61 |
+
dropout=dropout if lstm_layers > 1 else 0
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
# --- Classification Head ---
|
| 65 |
+
# We'll build a small classifier on top of the LSTM's output
|
| 66 |
+
self.fc1 = nn.Linear(
|
| 67 |
+
lstm_hidden_size * 2, # * 2 because the LSTM is bidirectional
|
| 68 |
+
lstm_hidden_size // 2
|
| 69 |
+
)
|
| 70 |
+
self.relu = nn.ReLU()
|
| 71 |
+
self.dropout = nn.Dropout(dropout)
|
| 72 |
+
self.fc2 = nn.Linear(lstm_hidden_size // 2, num_classes) # Final output: 2 classes
|
| 73 |
+
|
| 74 |
+
def forward(self, x):
|
| 75 |
+
# Input x has shape: [B, T, C, H, W]
|
| 76 |
+
# B = Batch Size
|
| 77 |
+
# T = Sequence Length (e.g., 20 frames)
|
| 78 |
+
# C, H, W = Frame dimensions (3, 224, 224)
|
| 79 |
+
|
| 80 |
+
batch_size, seq_len, c, h, w = x.shape
|
| 81 |
+
|
| 82 |
+
# --- 1. Feature Extraction (CNN) ---
|
| 83 |
+
# We need to pass all frames through the CNN.
|
| 84 |
+
# Reshape to [B * T, C, H, W] to treat all frames as one big batch.
|
| 85 |
+
x_flat = x.view(batch_size * seq_len, c, h, w)
|
| 86 |
+
|
| 87 |
+
features = self.feature_extractor(x_flat)
|
| 88 |
+
# 'features' now has shape [B * T, cnn_feature_dim]
|
| 89 |
+
|
| 90 |
+
# --- 2. Sequence Modeling (LSTM) ---
|
| 91 |
+
# Reshape features back into sequences: [B, T, cnn_feature_dim]
|
| 92 |
+
features_seq = features.view(batch_size, seq_len, -1)
|
| 93 |
+
|
| 94 |
+
# Pass the sequence of features through the LSTM
|
| 95 |
+
# lstm_out shape: [B, T, 2 * lstm_hidden_size] (because bidirectional)
|
| 96 |
+
# h_n, c_n are the final hidden/cell states, which we don't need here
|
| 97 |
+
lstm_out, (h_n, c_n) = self.lstm(features_seq)
|
| 98 |
+
|
| 99 |
+
# We'll use the output from the *last* time step for classification
|
| 100 |
+
# lstm_out[:, -1, :] gets the output of the last frame in the sequence
|
| 101 |
+
last_time_step_out = lstm_out[:, -1, :]
|
| 102 |
+
# Shape is now [B, 2 * lstm_hidden_size]
|
| 103 |
+
|
| 104 |
+
# --- 3. Classification ---
|
| 105 |
+
# Pass the LSTM's final output through our classifier
|
| 106 |
+
x = self.dropout(self.relu(self.fc1(last_time_step_out)))
|
| 107 |
+
out = self.fc2(x)
|
| 108 |
+
# 'out' shape: [B, num_classes] (e.g., [8, 2])
|
| 109 |
+
|
| 110 |
+
return out
|
requirements.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# --- Backend API ---
|
| 2 |
+
fastapi==0.111.0
|
| 3 |
+
uvicorn==0.30.1
|
| 4 |
+
python-multipart==0.0.9
|
| 5 |
+
|
| 6 |
+
# --- Core ML Libraries (Stable for Py 3.11) ---
|
| 7 |
+
torch --index-url https://download.pytorch.org/whl/cpu
|
| 8 |
+
torchvision --index-url https://download.pytorch.org/whl/cpu
|
| 9 |
+
|
| 10 |
+
# --- Face Detection & Processing ---
|
| 11 |
+
facenet-pytorch==2.5.3
|
| 12 |
+
opencv-python-headless==4.10.0.84
|
| 13 |
+
numpy==1.26.4
|
| 14 |
+
pandas==2.2.2
|
| 15 |
+
scikit-learn==1.5.1
|
| 16 |
+
matplotlib==3.8.2
|
| 17 |
+
|
| 18 |
+
# --- AI Text Detection (ModernBERT requires >= 4.48.0) ---
|
| 19 |
+
transformers>=4.48.0
|
| 20 |
+
huggingface-hub>=0.20.0
|
| 21 |
+
accelerate>=0.26.0
|
slop_detector.py
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
AI Text Detector Integration Module
|
| 3 |
+
|
| 4 |
+
This module integrates the slop-detector-bert model from Hugging Face
|
| 5 |
+
for detecting AI-generated text content. It can be used in combination
|
| 6 |
+
with the deepfake video detector for multi-modal analysis.
|
| 7 |
+
|
| 8 |
+
Model: gouwsxander/slop-detector-bert
|
| 9 |
+
- BERT-based classifier for detecting AI-generated text
|
| 10 |
+
- Trained on Wikipedia human-written vs AI-rewritten paragraphs
|
| 11 |
+
- Labels: LABEL_0 (HUMAN), LABEL_1 (AI)
|
| 12 |
+
- This is a PEFT/LoRA adapter on bert-base-cased
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import torch
|
| 16 |
+
import torch.nn.functional as F
|
| 17 |
+
from typing import Optional
|
| 18 |
+
from dataclasses import dataclass
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@dataclass
|
| 22 |
+
class SlopDetectionResult:
|
| 23 |
+
"""Result from AI text detection."""
|
| 24 |
+
text: str
|
| 25 |
+
label: str # "HUMAN" or "AI"
|
| 26 |
+
confidence: float
|
| 27 |
+
is_ai_generated: bool
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class SlopDetector:
|
| 31 |
+
|
| 32 |
+
# Using the requested ModernBERT model
|
| 33 |
+
# Note: ModernBERT requires transformers >= 4.48.0
|
| 34 |
+
MODEL_NAME = "AICodexLab/answerdotai-ModernBERT-base-ai-detector"
|
| 35 |
+
|
| 36 |
+
def __init__(self, device: Optional[str] = None):
|
| 37 |
+
|
| 38 |
+
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
|
| 39 |
+
self._model = None
|
| 40 |
+
self._tokenizer = None
|
| 41 |
+
self._loaded = False
|
| 42 |
+
|
| 43 |
+
def load_model(self) -> None:
|
| 44 |
+
"""Lazily load the model from Hugging Face."""
|
| 45 |
+
if self._loaded:
|
| 46 |
+
return
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 50 |
+
|
| 51 |
+
print(f"Loading ModernBERT detector on {self.device}...")
|
| 52 |
+
|
| 53 |
+
# Load tokenizer
|
| 54 |
+
self._tokenizer = AutoTokenizer.from_pretrained(self.MODEL_NAME)
|
| 55 |
+
|
| 56 |
+
# Load model
|
| 57 |
+
self._model = AutoModelForSequenceClassification.from_pretrained(
|
| 58 |
+
self.MODEL_NAME,
|
| 59 |
+
num_labels=2,
|
| 60 |
+
trust_remote_code=True
|
| 61 |
+
)
|
| 62 |
+
self._model = self._model.to(self.device)
|
| 63 |
+
self._model.eval()
|
| 64 |
+
|
| 65 |
+
self._loaded = True
|
| 66 |
+
print("ModernBERT detector loaded successfully!")
|
| 67 |
+
|
| 68 |
+
except Exception as e:
|
| 69 |
+
print(f"Error loading ModernBERT detector: {e}")
|
| 70 |
+
print("Tip: Ensure you have transformers>=4.48.0 installed.")
|
| 71 |
+
raise
|
| 72 |
+
|
| 73 |
+
def detect(self, text: str) -> SlopDetectionResult:
|
| 74 |
+
|
| 75 |
+
self.load_model()
|
| 76 |
+
|
| 77 |
+
if not text or not text.strip():
|
| 78 |
+
return SlopDetectionResult(
|
| 79 |
+
text=text,
|
| 80 |
+
label="UNKNOWN",
|
| 81 |
+
confidence=0.0,
|
| 82 |
+
is_ai_generated=False
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
# Tokenize with truncation
|
| 86 |
+
inputs = self._tokenizer(
|
| 87 |
+
text,
|
| 88 |
+
return_tensors="pt",
|
| 89 |
+
max_length=512,
|
| 90 |
+
truncation=True,
|
| 91 |
+
padding=True
|
| 92 |
+
)
|
| 93 |
+
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
| 94 |
+
|
| 95 |
+
with torch.no_grad():
|
| 96 |
+
outputs = self._model(**inputs)
|
| 97 |
+
probabilities = F.softmax(outputs.logits, dim=-1)
|
| 98 |
+
|
| 99 |
+
# Get prediction
|
| 100 |
+
predicted_class_id = probabilities.argmax().item()
|
| 101 |
+
confidence = probabilities[0, predicted_class_id].item() * 100
|
| 102 |
+
|
| 103 |
+
# LABEL_1 = AI, LABEL_0 = HUMAN
|
| 104 |
+
is_ai = predicted_class_id == 1
|
| 105 |
+
label = "AI" if is_ai else "HUMAN"
|
| 106 |
+
|
| 107 |
+
return SlopDetectionResult(
|
| 108 |
+
text=text[:500] + "..." if len(text) > 500 else text,
|
| 109 |
+
label=label,
|
| 110 |
+
confidence=confidence,
|
| 111 |
+
is_ai_generated=is_ai
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
def detect_batch(self, texts: list[str]) -> list[SlopDetectionResult]:
|
| 115 |
+
self.load_model()
|
| 116 |
+
|
| 117 |
+
results = []
|
| 118 |
+
for text in texts:
|
| 119 |
+
results.append(self.detect(text))
|
| 120 |
+
|
| 121 |
+
return results
|
| 122 |
+
|
| 123 |
+
def analyze_paragraphs(self, full_text: str) -> dict:
|
| 124 |
+
self.load_model()
|
| 125 |
+
|
| 126 |
+
# Split into paragraphs
|
| 127 |
+
paragraphs = [p.strip() for p in full_text.split('\n') if len(p.strip()) > 20]
|
| 128 |
+
|
| 129 |
+
if not paragraphs:
|
| 130 |
+
return {
|
| 131 |
+
"overall_label": "UNKNOWN",
|
| 132 |
+
"overall_confidence": 0.0,
|
| 133 |
+
"ai_probability": 0.0,
|
| 134 |
+
"paragraph_count": 0,
|
| 135 |
+
"ai_paragraph_count": 0,
|
| 136 |
+
"details": []
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
# Analyze each paragraph
|
| 140 |
+
paragraph_results = self.detect_batch(paragraphs)
|
| 141 |
+
|
| 142 |
+
# Calculate aggregate metrics
|
| 143 |
+
ai_count = sum(1 for r in paragraph_results if r.is_ai_generated)
|
| 144 |
+
ai_confidences = [r.confidence for r in paragraph_results if r.is_ai_generated]
|
| 145 |
+
human_confidences = [r.confidence for r in paragraph_results if not r.is_ai_generated]
|
| 146 |
+
|
| 147 |
+
# Overall probability based on paragraph analysis
|
| 148 |
+
ai_probability = (ai_count / len(paragraphs)) * 100
|
| 149 |
+
|
| 150 |
+
# Determine overall label (majority vote with confidence weighting)
|
| 151 |
+
if ai_count > len(paragraphs) / 2:
|
| 152 |
+
overall_label = "AI"
|
| 153 |
+
overall_confidence = sum(ai_confidences) / len(ai_confidences) if ai_confidences else 0
|
| 154 |
+
else:
|
| 155 |
+
overall_label = "HUMAN"
|
| 156 |
+
overall_confidence = sum(human_confidences) / len(human_confidences) if human_confidences else 0
|
| 157 |
+
|
| 158 |
+
return {
|
| 159 |
+
"overall_label": overall_label,
|
| 160 |
+
"overall_confidence": round(overall_confidence, 2),
|
| 161 |
+
"ai_probability": round(ai_probability, 2),
|
| 162 |
+
"paragraph_count": len(paragraphs),
|
| 163 |
+
"ai_paragraph_count": ai_count,
|
| 164 |
+
"details": [
|
| 165 |
+
{
|
| 166 |
+
"paragraph_preview": r.text[:100] + "..." if len(r.text) > 100 else r.text,
|
| 167 |
+
"label": r.label,
|
| 168 |
+
"confidence": round(r.confidence, 2)
|
| 169 |
+
}
|
| 170 |
+
for r in paragraph_results
|
| 171 |
+
]
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
# Singleton instance for easy import
|
| 176 |
+
_detector_instance: Optional[SlopDetector] = None
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def get_slop_detector() -> SlopDetector:
|
| 180 |
+
"""Get or create the singleton SlopDetector instance."""
|
| 181 |
+
global _detector_instance
|
| 182 |
+
if _detector_instance is None:
|
| 183 |
+
_detector_instance = SlopDetector()
|
| 184 |
+
return _detector_instance
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def detect_ai_text(text: str) -> SlopDetectionResult:
|
| 188 |
+
|
| 189 |
+
detector = get_slop_detector()
|
| 190 |
+
return detector.detect(text)
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def analyze_text_content(text: str) -> dict:
|
| 194 |
+
|
| 195 |
+
detector = get_slop_detector()
|
| 196 |
+
return detector.analyze_paragraphs(text)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
# Example usage and testing
|
| 200 |
+
if __name__ == "__main__":
|
| 201 |
+
# Test the detector
|
| 202 |
+
test_texts = [
|
| 203 |
+
# Human-like text (original Wikipedia style)
|
| 204 |
+
"Born in Bristol and raised in Glastonbury to an English father and Belgian mother, "
|
| 205 |
+
"Norris began competitive kart racing aged eight. After a successful karting career, "
|
| 206 |
+
"which culminated in his victory at the direct-drive World Championship in 2014, "
|
| 207 |
+
"Norris graduated to junior formulae.",
|
| 208 |
+
|
| 209 |
+
# AI-like text (more polished/structured)
|
| 210 |
+
"Born in Bristol and raised in Glastonbury to an English father and a Belgian mother, "
|
| 211 |
+
"Norris began competing in karting at the age of eight. He enjoyed a successful karting "
|
| 212 |
+
"career, culminating in his victory at the direct-drive World Championship in 2014, "
|
| 213 |
+
"before progressing into the junior single-seater categories.",
|
| 214 |
+
]
|
| 215 |
+
|
| 216 |
+
print("=" * 60)
|
| 217 |
+
print("AI Text Detection Test")
|
| 218 |
+
print("=" * 60)
|
| 219 |
+
|
| 220 |
+
detector = SlopDetector()
|
| 221 |
+
|
| 222 |
+
for i, text in enumerate(test_texts, 1):
|
| 223 |
+
result = detector.detect(text)
|
| 224 |
+
print(f"\nText {i}:")
|
| 225 |
+
print(f" Preview: {text[:80]}...")
|
| 226 |
+
print(f" Label: {result.label}")
|
| 227 |
+
print(f" Confidence: {result.confidence:.2f}%")
|
| 228 |
+
print(f" Is AI Generated: {result.is_ai_generated}")
|