Shreesha1 commited on
Commit
b5fabfd
·
verified ·
1 Parent(s): 29c2819
Files changed (6) hide show
  1. Dockerfile +24 -0
  2. app.py +488 -0
  3. dataset.py +230 -0
  4. model.py +110 -0
  5. requirements.txt +21 -0
  6. slop_detector.py +228 -0
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim-bookworm
2
+
3
+ # System deps for OpenCV, etc.
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ libgl1 \
6
+ libglib2.0-0 \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ WORKDIR /app
10
+
11
+ # Install Python deps
12
+ COPY requirements.txt .
13
+ RUN pip install --upgrade pip && \
14
+ pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Copy code + models
17
+ COPY . .
18
+
19
+ # Cloud Run will set PORT env; default to 8080 if not set
20
+ ENV PORT=8080
21
+ EXPOSE 8080
22
+
23
+ # Use sh -c so ${PORT} is expanded by the shell
24
+ CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port ${PORT:-8080}"]
app.py ADDED
@@ -0,0 +1,488 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, HTTPException, Form
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ from typing import Optional
5
+ from contextlib import asynccontextmanager
6
+ import torch
7
+ import os
8
+ import shutil
9
+ import tempfile
10
+ import torch.nn.functional as F
11
+ from pathlib import Path
12
+
13
+ from model import DeepfakeDetector, FeatureExtractor
14
+ from dataset import extract_frames_from_video, process_image
15
+ from slop_detector import SlopDetector, detect_ai_text, analyze_text_content
16
+
17
+ BASE_DIR = Path(__file__).resolve().parent
18
+ SEQUENCE_LENGTH = 10
19
+ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
+
21
+ @asynccontextmanager
22
+ async def lifespan(app: FastAPI):
23
+ # --- Startup: Load Models Eagerly ---
24
+ print("Startup: Pre-loading default models to avoid delay...")
25
+ try:
26
+ # Load Video Model
27
+ load_model_if_needed()
28
+
29
+ # Load Text Model
30
+ load_slop_detector_if_needed()
31
+ print("Startup: All models loaded and ready!")
32
+ except Exception as e:
33
+ print(f"Startup Warning: Could not pre-load models: {e}")
34
+
35
+ yield
36
+
37
+ # --- Shutdown (Cleanup if needed) ---
38
+ print("Shutdown: Cleaning up...")
39
+
40
+ app = FastAPI(lifespan=lifespan)
41
+
42
+ allowed_origins = [
43
+ "http://localhost:5173", # local vite
44
+ "http://localhost:8080", # if you're using that
45
+ "https://deepfake-detection-lime.vercel.app/", # ← replace with real URL after first deploy
46
+ ]
47
+ app.add_middleware(
48
+ CORSMiddleware,
49
+ allow_origins=["*"], # tighten in prod
50
+ allow_credentials=True,
51
+ allow_methods=["*"],
52
+ allow_headers=["*"],
53
+ )
54
+
55
+ # --- Model Paths ---
56
+ SAVED_MODEL_PATH = BASE_DIR / "saved_models" / "deepfake_detector_best.pth"
57
+
58
+ model = None
59
+ feature_dim = None
60
+ model_error: str | None = None
61
+
62
+ # Slop detector for AI text detection
63
+ slop_detector = None
64
+ slop_detector_error: str | None = None
65
+
66
+
67
+ # Pydantic models for request/response
68
+ class TextAnalysisRequest(BaseModel):
69
+ text: str
70
+
71
+
72
+ class TextAnalysisResponse(BaseModel):
73
+ status: str
74
+ label: str
75
+ confidence: float
76
+ is_ai_generated: bool
77
+ details: Optional[dict] = None
78
+
79
+
80
+ def load_model_if_needed():
81
+ global model, feature_dim, model_error
82
+
83
+ if model is not None:
84
+ return
85
+
86
+ print("Loading deepfake model lazily on first request...")
87
+ try:
88
+ temp_cnn = FeatureExtractor(freeze=True)
89
+ feature_dim_local = temp_cnn.feature_dim
90
+ del temp_cnn
91
+
92
+ m = DeepfakeDetector(
93
+ cnn_feature_dim=feature_dim_local,
94
+ lstm_hidden_size=512,
95
+ lstm_layers=2,
96
+ ).to(DEVICE)
97
+
98
+ if not os.path.exists(SAVED_MODEL_PATH):
99
+ err = f"Model file not found at: {SAVED_MODEL_PATH}"
100
+ print("Error:", err)
101
+ model_error = err
102
+ return
103
+
104
+ state = torch.load(SAVED_MODEL_PATH, map_location=DEVICE)
105
+ m.load_state_dict(state)
106
+ m.eval()
107
+
108
+ # Update globals
109
+ model_error = None
110
+ globals()["model"] = m
111
+ globals()["feature_dim"] = feature_dim_local
112
+
113
+ print("Model loaded successfully!")
114
+ except Exception as e:
115
+ model_error = str(e)
116
+ print(f"Error loading model: {e}")
117
+
118
+
119
+ def load_slop_detector_if_needed():
120
+ global slop_detector, slop_detector_error
121
+
122
+ if slop_detector is not None:
123
+ return
124
+
125
+ print("Loading slop detector for AI text detection...")
126
+ try:
127
+ detector = SlopDetector(device=str(DEVICE))
128
+ detector.load_model()
129
+
130
+ slop_detector_error = None
131
+ globals()["slop_detector"] = detector
132
+
133
+ print("Slop detector loaded successfully!")
134
+ except Exception as e:
135
+ slop_detector_error = str(e)
136
+ print(f"Error loading slop detector: {e}")
137
+
138
+
139
+ @app.get("/")
140
+ def root():
141
+ return {"message": "Deepfake detector backend running"}
142
+
143
+
144
+ @app.get("/health")
145
+ def health():
146
+ status_info = {}
147
+
148
+ # Check deepfake model status
149
+ if model_error is not None:
150
+ status_info["deepfake_model"] = {"status": "error", "detail": model_error}
151
+ elif model is None:
152
+ status_info["deepfake_model"] = {"status": "not_loaded_yet"}
153
+ else:
154
+ status_info["deepfake_model"] = {"status": "ok"}
155
+
156
+ # Check slop detector status
157
+ if slop_detector_error is not None:
158
+ status_info["slop_detector"] = {"status": "error", "detail": slop_detector_error}
159
+ elif slop_detector is None:
160
+ status_info["slop_detector"] = {"status": "not_loaded_yet"}
161
+ else:
162
+ status_info["slop_detector"] = {"status": "ok"}
163
+
164
+ overall_status = "ok"
165
+ if model_error or slop_detector_error:
166
+ overall_status = "partial_error"
167
+ elif model is None and slop_detector is None:
168
+ overall_status = "models_not_loaded_yet"
169
+
170
+ return {"status": overall_status, "models": status_info}
171
+
172
+
173
+ @app.post("/predict")
174
+ async def predict_video(file: UploadFile = File(...)):
175
+ # Lazy load model on first request
176
+ load_model_if_needed()
177
+
178
+ if model is None:
179
+ # loading failed
180
+ raise HTTPException(
181
+ status_code=503,
182
+ detail=f"Model not available on server. Error: {model_error}",
183
+ )
184
+
185
+ if not file.filename.lower().endswith((".mp4", ".mov", ".avi")):
186
+ raise HTTPException(
187
+ status_code=400,
188
+ detail="Invalid file type. Please upload .mp4, .mov, or .avi",
189
+ )
190
+
191
+ # Save uploaded file to temp path
192
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
193
+ shutil.copyfileobj(file.file, temp_file)
194
+ temp_file_path = temp_file.name
195
+
196
+ try:
197
+ frames_tensor = extract_frames_from_video(
198
+ video_path=temp_file_path,
199
+ sequence_length=SEQUENCE_LENGTH,
200
+ )
201
+
202
+ if frames_tensor is None:
203
+ return {
204
+ "status": "error",
205
+ "message": "Could not detect a face in the video.",
206
+ }
207
+
208
+ frames_tensor = frames_tensor.unsqueeze(0).to(DEVICE)
209
+
210
+ with torch.no_grad():
211
+ output = model(frames_tensor)
212
+ probabilities = F.softmax(output, dim=1)
213
+ confidence, predicted_class = torch.max(probabilities, 1)
214
+
215
+ prediction_idx = predicted_class.item()
216
+ conf_score = confidence.item() * 100
217
+ result_label = "FAKE" if prediction_idx == 1 else "REAL"
218
+
219
+ return {
220
+ "status": "success",
221
+ "filename": file.filename,
222
+ "prediction": result_label,
223
+ "confidence": round(conf_score, 2),
224
+ "is_fake": prediction_idx == 1,
225
+ }
226
+ except Exception as e:
227
+ raise HTTPException(status_code=500, detail=str(e))
228
+ finally:
229
+ os.remove(temp_file_path)
230
+
231
+
232
+ @app.post("/analyze-image")
233
+ async def analyze_image(file: UploadFile = File(...)):
234
+ # Lazy load model on first request
235
+ load_model_if_needed()
236
+
237
+ if model is None:
238
+ raise HTTPException(
239
+ status_code=503,
240
+ detail=f"Model not available on server. Error: {model_error}",
241
+ )
242
+
243
+ if not file.filename.lower().endswith((".jpg", ".jpeg", ".png", ".webp")):
244
+ raise HTTPException(
245
+ status_code=400,
246
+ detail="Invalid file type. Please upload .jpg, .jpeg, .png, or .webp",
247
+ )
248
+
249
+ # Save uploaded file to temp path
250
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
251
+ shutil.copyfileobj(file.file, temp_file)
252
+ temp_file_path = temp_file.name
253
+
254
+ try:
255
+ # Use the new process_image function
256
+ # This will return a tensor of shape [SEQUENCE_LENGTH, 3, 224, 224]
257
+ # essentially treating the image as a static video
258
+ frames_tensor = process_image(
259
+ image_path=temp_file_path,
260
+ sequence_length=SEQUENCE_LENGTH,
261
+ )
262
+
263
+ if frames_tensor is None:
264
+ return {
265
+ "status": "error",
266
+ "message": "Could not detect a face in the image.",
267
+ }
268
+
269
+ frames_tensor = frames_tensor.unsqueeze(0).to(DEVICE)
270
+
271
+ with torch.no_grad():
272
+ output = model(frames_tensor)
273
+ probabilities = F.softmax(output, dim=1)
274
+ confidence, predicted_class = torch.max(probabilities, 1)
275
+
276
+ prediction_idx = predicted_class.item()
277
+ conf_score = confidence.item() * 100
278
+ result_label = "FAKE" if prediction_idx == 1 else "REAL"
279
+
280
+ return {
281
+ "status": "success",
282
+ "filename": file.filename,
283
+ "prediction": result_label,
284
+ "confidence": round(conf_score, 2),
285
+ "is_fake": prediction_idx == 1,
286
+ "type": "image_analysis"
287
+ }
288
+ except Exception as e:
289
+ raise HTTPException(status_code=500, detail=str(e))
290
+ finally:
291
+ if os.path.exists(temp_file_path):
292
+ os.remove(temp_file_path)
293
+
294
+
295
+ @app.post("/analyze-text")
296
+ async def analyze_text(request: TextAnalysisRequest):
297
+ load_slop_detector_if_needed()
298
+
299
+ if slop_detector is None:
300
+ raise HTTPException(
301
+ status_code=503,
302
+ detail=f"Slop detector not available. Error: {slop_detector_error}",
303
+ )
304
+
305
+ try:
306
+ result = slop_detector.detect(request.text)
307
+
308
+ return {
309
+ "status": "success",
310
+ "label": result.label,
311
+ "confidence": round(result.confidence, 2),
312
+ "is_ai_generated": result.is_ai_generated,
313
+ }
314
+ except Exception as e:
315
+ raise HTTPException(status_code=500, detail=str(e))
316
+
317
+
318
+ @app.post("/analyze-text-detailed")
319
+ async def analyze_text_detailed(request: TextAnalysisRequest):
320
+ load_slop_detector_if_needed()
321
+
322
+ if slop_detector is None:
323
+ raise HTTPException(
324
+ status_code=503,
325
+ detail=f"Slop detector not available. Error: {slop_detector_error}",
326
+ )
327
+
328
+ try:
329
+ analysis = slop_detector.analyze_paragraphs(request.text)
330
+
331
+ return {
332
+ "status": "success",
333
+ **analysis
334
+ }
335
+ except Exception as e:
336
+ raise HTTPException(status_code=500, detail=str(e))
337
+
338
+
339
+ @app.post("/predict-combined")
340
+ async def predict_combined(
341
+ file: UploadFile = File(...),
342
+ context_text: Optional[str] = Form(None),
343
+ ):
344
+ # Load both models
345
+ load_model_if_needed()
346
+
347
+ if model is None:
348
+ raise HTTPException(
349
+ status_code=503,
350
+ detail=f"Deepfake model not available. Error: {model_error}",
351
+ )
352
+
353
+ if not file.filename.lower().endswith((".mp4", ".mov", ".avi")):
354
+ raise HTTPException(
355
+ status_code=400,
356
+ detail="Invalid file type. Please upload .mp4, .mov, or .avi",
357
+ )
358
+
359
+ # Save uploaded file to temp path
360
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
361
+ shutil.copyfileobj(file.file, temp_file)
362
+ temp_file_path = temp_file.name
363
+
364
+ try:
365
+ # --- Video Deepfake Detection ---
366
+ frames_tensor = extract_frames_from_video(
367
+ video_path=temp_file_path,
368
+ sequence_length=SEQUENCE_LENGTH,
369
+ )
370
+
371
+ if frames_tensor is None:
372
+ video_result = {
373
+ "status": "error",
374
+ "message": "Could not detect a face in the video.",
375
+ "prediction": None,
376
+ "confidence": None,
377
+ "is_fake": None,
378
+ }
379
+ else:
380
+ frames_tensor = frames_tensor.unsqueeze(0).to(DEVICE)
381
+
382
+ with torch.no_grad():
383
+ output = model(frames_tensor)
384
+ probabilities = F.softmax(output, dim=1)
385
+ confidence, predicted_class = torch.max(probabilities, 1)
386
+
387
+ prediction_idx = predicted_class.item()
388
+ conf_score = confidence.item() * 100
389
+ result_label = "FAKE" if prediction_idx == 1 else "REAL"
390
+
391
+ video_result = {
392
+ "status": "success",
393
+ "prediction": result_label,
394
+ "confidence": round(conf_score, 2),
395
+ "is_fake": prediction_idx == 1,
396
+ }
397
+
398
+ # --- Text Context Analysis (if provided) ---
399
+ text_result = None
400
+ if context_text and context_text.strip():
401
+ load_slop_detector_if_needed()
402
+
403
+ if slop_detector is not None:
404
+ text_analysis = slop_detector.analyze_paragraphs(context_text)
405
+ text_result = {
406
+ "status": "success",
407
+ "overall_label": text_analysis["overall_label"],
408
+ "overall_confidence": text_analysis["overall_confidence"],
409
+ "ai_probability": text_analysis["ai_probability"],
410
+ "paragraph_count": text_analysis["paragraph_count"],
411
+ "ai_paragraph_count": text_analysis["ai_paragraph_count"],
412
+ }
413
+ else:
414
+ text_result = {
415
+ "status": "error",
416
+ "message": f"Slop detector not available: {slop_detector_error}"
417
+ }
418
+
419
+ # --- Combined Assessment ---
420
+ combined_verdict = determine_combined_verdict(video_result, text_result)
421
+
422
+ return {
423
+ "status": "success",
424
+ "filename": file.filename,
425
+ "video_analysis": video_result,
426
+ "text_analysis": text_result,
427
+ "combined_verdict": combined_verdict,
428
+ }
429
+
430
+ except Exception as e:
431
+ raise HTTPException(status_code=500, detail=str(e))
432
+ finally:
433
+ if os.path.exists(temp_file_path):
434
+ os.remove(temp_file_path)
435
+
436
+
437
+ def determine_combined_verdict(video_result: dict, text_result: Optional[dict]) -> dict:
438
+ video_fake = video_result.get("is_fake")
439
+ video_confidence = video_result.get("confidence", 0)
440
+ video_status = video_result.get("status")
441
+
442
+ text_ai = None
443
+ text_confidence = None
444
+
445
+ if text_result and text_result.get("status") == "success":
446
+ text_ai = text_result.get("overall_label") == "AI"
447
+ text_confidence = text_result.get("overall_confidence", 0)
448
+
449
+ # Determine verdict
450
+ if video_status == "error":
451
+ return {
452
+ "verdict": "INCONCLUSIVE",
453
+ "severity": "unknown",
454
+ "explanation": "Could not analyze video (no face detected). " +
455
+ (f"Text appears {'AI-generated' if text_ai else 'human-written'}." if text_ai is not None else "")
456
+ }
457
+
458
+ if video_fake and text_ai:
459
+ return {
460
+ "verdict": "HIGH_RISK_DEEPFAKE",
461
+ "severity": "high",
462
+ "explanation": f"Video detected as FAKE ({video_confidence:.1f}% confidence) AND associated text appears AI-generated ({text_confidence:.1f}% confidence). This combination suggests sophisticated manipulation."
463
+ }
464
+ elif video_fake and text_ai is False:
465
+ return {
466
+ "verdict": "DEEPFAKE_DETECTED",
467
+ "severity": "high",
468
+ "explanation": f"Video detected as FAKE ({video_confidence:.1f}% confidence). Associated text appears human-written."
469
+ }
470
+ elif video_fake and text_ai is None:
471
+ return {
472
+ "verdict": "DEEPFAKE_DETECTED",
473
+ "severity": "high",
474
+ "explanation": f"Video detected as FAKE ({video_confidence:.1f}% confidence). No text context provided for additional analysis."
475
+ }
476
+ elif not video_fake and text_ai:
477
+ return {
478
+ "verdict": "SUSPICIOUS_CONTEXT",
479
+ "severity": "medium",
480
+ "explanation": f"Video appears REAL ({video_confidence:.1f}% confidence), but associated text appears AI-generated ({text_confidence:.1f}% confidence). Context may be misleading."
481
+ }
482
+ else:
483
+ return {
484
+ "verdict": "LIKELY_AUTHENTIC",
485
+ "severity": "low",
486
+ "explanation": f"Video appears REAL ({video_confidence:.1f}% confidence)." +
487
+ (f" Associated text appears human-written ({text_confidence:.1f}% confidence)." if text_ai is False else "")
488
+ }
dataset.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import os
3
+ import torch
4
+ import numpy as np
5
+ from torch.utils.data import Dataset
6
+ from torchvision import transforms
7
+ # Import Facenet-PyTorch for Face Detection (No TensorFlow needed)
8
+ from facenet_pytorch import MTCNN
9
+
10
+ # --- 1. CONFIGURATION ---
11
+ # 10 frames is enough for a resume project and runs faster on CPU
12
+ SEQUENCE_LENGTH_DEFAULT = 10
13
+ IMG_SIZE = 224
14
+ DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
15
+
16
+ # --- 2. INITIALIZE MTCNN ---
17
+ print(f"Initializing MTCNN on {DEVICE}...")
18
+ # keep_all=True returns all faces, we'll sort them.
19
+ # select_largest=False because we manually sort by confidence/size if needed, but 'keep_all=False' (default) returns only best face?
20
+ # actually detect returns all.
21
+ mtcnn_detector = MTCNN(keep_all=True, device=DEVICE)
22
+
23
+ # Standard normalization
24
+ data_transforms = transforms.Compose([
25
+ transforms.ToPILImage(),
26
+ transforms.Resize((IMG_SIZE, IMG_SIZE)),
27
+ transforms.ToTensor(),
28
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
29
+ ])
30
+
31
+ # --- 3. PREPROCESSING FUNCTION ---
32
+ def extract_frames_from_video(video_path, sequence_length=SEQUENCE_LENGTH_DEFAULT):
33
+ cap = cv2.VideoCapture(video_path)
34
+ if not cap.isOpened():
35
+ return None
36
+
37
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
38
+ if total_frames <= 0:
39
+ return None
40
+
41
+ processed_frames = []
42
+ frame_indices = np.linspace(0, total_frames - 1, sequence_length, dtype=int)
43
+
44
+ for i in frame_indices:
45
+ cap.set(cv2.CAP_PROP_POS_FRAMES, i)
46
+ ret, frame = cap.read()
47
+ if not ret: continue
48
+
49
+ # Convert to RGB for MTCNN (OpenCV is BGR)
50
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
51
+
52
+ try:
53
+ # Detect faces
54
+ # boxes given as [x1, y1, x2, y2]
55
+ boxes, probs = mtcnn_detector.detect(frame_rgb)
56
+
57
+ if boxes is not None and len(boxes) > 0:
58
+ # Get highest probability face or first one?
59
+ # probs is list of probabilities. Filter valid ones.
60
+ # Just take the one with standard highest probability.
61
+
62
+ # Combine boxes and probs to sort
63
+ face_list = []
64
+ for box, prob in zip(boxes, probs):
65
+ if prob is None: continue
66
+ face_list.append({'box': box, 'conf': prob})
67
+
68
+ if not face_list: continue
69
+
70
+ best_face = sorted(face_list, key=lambda x: x['conf'], reverse=True)[0]
71
+ x1, y1, x2, y2 = best_face['box']
72
+
73
+ w = x2 - x1
74
+ h = y2 - y1
75
+ x = x1
76
+ y = y1
77
+
78
+ # Fix negative coordinates and float
79
+ x, y = max(0, int(x)), max(0, int(y))
80
+ w, h = int(w), int(h)
81
+
82
+ # Add padding (10%)
83
+ pad_w = int(w * 0.1)
84
+ pad_h = int(h * 0.1)
85
+
86
+ img_h, img_w, _ = frame.shape
87
+ y_min = max(0, y - pad_h)
88
+ y_max = min(img_h, y + h + pad_h)
89
+ x_min = max(0, x - pad_w)
90
+ x_max = min(img_w, x + w + pad_w)
91
+
92
+ face_crop = frame[y_min:y_max, x_min:x_max]
93
+
94
+ if face_crop.size != 0:
95
+ processed_frame = data_transforms(face_crop)
96
+ processed_frames.append(processed_frame)
97
+ except Exception as e:
98
+ # print(f"Frame processing error: {e}")
99
+ continue
100
+
101
+ cap.release()
102
+
103
+ if not processed_frames:
104
+ return None
105
+
106
+ # Padding if we missed some frames due to detection failure
107
+ while len(processed_frames) < sequence_length:
108
+ processed_frames.append(processed_frames[-1])
109
+
110
+ return torch.stack(processed_frames[:sequence_length])
111
+
112
+
113
+ # --- 3b. IMAGE PROCESSING FUNCTION ---
114
+ def process_image(image_path, sequence_length=SEQUENCE_LENGTH_DEFAULT):
115
+ try:
116
+ frame = cv2.imread(image_path)
117
+ if frame is None:
118
+ return None
119
+
120
+ # Convert to RGB
121
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
122
+
123
+ # Detect faces
124
+ boxes, probs = mtcnn_detector.detect(frame_rgb)
125
+
126
+ if boxes is None or len(boxes) == 0:
127
+ return None
128
+
129
+ face_list = []
130
+ for box, prob in zip(boxes, probs):
131
+ if prob is None: continue
132
+ face_list.append({'box': box, 'conf': prob})
133
+
134
+ if not face_list: return None
135
+
136
+ best_face = sorted(face_list, key=lambda x: x['conf'], reverse=True)[0]
137
+ x1, y1, x2, y2 = best_face['box']
138
+
139
+ w = x2 - x1
140
+ h = y2 - y1
141
+ x = x1
142
+ y = y1
143
+
144
+ # Integer conversion and padding
145
+ x, y = max(0, int(x)), max(0, int(y))
146
+ w, h = int(w), int(h)
147
+
148
+ pad_w = int(w * 0.1)
149
+ pad_h = int(h * 0.1)
150
+
151
+ img_h, img_w, _ = frame.shape
152
+ y_min = max(0, y - pad_h)
153
+ y_max = min(img_h, y + h + pad_h)
154
+ x_min = max(0, x - pad_w)
155
+ x_max = min(img_w, x + w + pad_w)
156
+
157
+ face_crop = frame[y_min:y_max, x_min:x_max]
158
+
159
+ if face_crop.size == 0:
160
+ return None
161
+
162
+ processed_frame = data_transforms(face_crop) # [3, 224, 224]
163
+
164
+ # Repeat this frame to create a fake sequence
165
+ return processed_frame.unsqueeze(0).repeat(sequence_length, 1, 1, 1)
166
+
167
+ except Exception as e:
168
+ print(f"Error processing image: {e}")
169
+ return None
170
+
171
+
172
+ # --- 4. DATASET CLASS ---
173
+ class DeepfakeDataset(Dataset):
174
+ def __init__(self, data_dir, sequence_length=SEQUENCE_LENGTH_DEFAULT):
175
+ self.data_dir = data_dir
176
+ self.sequence_length = sequence_length
177
+ self.video_files = []
178
+ self.labels = []
179
+
180
+ print(f" Scanning for videos in {data_dir}...")
181
+
182
+ def find_videos_in_folder(folder_path):
183
+ video_paths = []
184
+ for root, dirs, files in os.walk(folder_path):
185
+ for file in files:
186
+ if file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
187
+ video_paths.append(os.path.join(root, file))
188
+ return video_paths
189
+
190
+ # --- 1. REAL VIDEOS (Limit 400) ---
191
+ real_path = os.path.join(data_dir, 'real')
192
+ real_videos = find_videos_in_folder(real_path)
193
+
194
+ if len(real_videos) > 400:
195
+ real_videos = real_videos[:400]
196
+
197
+ for vid in real_videos:
198
+ self.video_files.append(vid)
199
+ self.labels.append(0)
200
+
201
+ # --- 2. FAKE VIDEOS (Limit 400) ---
202
+ fake_path = os.path.join(data_dir, 'fake')
203
+ fake_videos = find_videos_in_folder(fake_path)
204
+
205
+ if len(fake_videos) > 400:
206
+ fake_videos = fake_videos[:400]
207
+
208
+ for vid in fake_videos:
209
+ self.video_files.append(vid)
210
+ self.labels.append(1)
211
+
212
+ self.total_videos = len(self.video_files)
213
+ print(f" Total dataset size: {self.total_videos} videos")
214
+
215
+ def __len__(self):
216
+ return len(self.video_files)
217
+
218
+ def __getitem__(self, idx):
219
+ video_path = self.video_files[idx]
220
+ label = self.labels[idx]
221
+
222
+ frames = extract_frames_from_video(video_path, self.sequence_length)
223
+
224
+ if frames is None:
225
+ return torch.zeros((self.sequence_length, 3, IMG_SIZE, IMG_SIZE)), -1
226
+
227
+ return frames, torch.tensor(label, dtype=torch.long)
228
+
229
+ if __name__ == "__main__":
230
+ ds = DeepfakeDataset('data/')
model.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torchvision import models
4
+
5
+ class FeatureExtractor(nn.Module):
6
+ """
7
+ Extracts spatial features from a single frame using a pre-trained ResNeXt.
8
+ """
9
+ def __init__(self, freeze=True):
10
+ super(FeatureExtractor, self).__init__()
11
+
12
+ # Load a pretrained ResNeXt50
13
+ # weights=models.ResNeXt50_32X4D_Weights.IMAGENET1K_V2 is the new syntax
14
+ self.model = models.resnext50_32x4d(weights=models.ResNeXt50_32X4D_Weights.IMAGENET1K_V2)
15
+
16
+ # Freeze all layers in the network
17
+ if freeze:
18
+ for param in self.model.parameters():
19
+ param.requires_grad = False
20
+
21
+ # Get the number of output features from the layer before the classifier
22
+ # In ResNeXt, this is self.model.fc
23
+ self.feature_dim = self.model.fc.in_features
24
+
25
+ # Remove the final classification layer (we don't need 1000 ImageNet classes)
26
+ # nn.Identity() is a placeholder that just passes the input through
27
+ self.model.fc = nn.Identity()
28
+
29
+ def forward(self, x):
30
+ # Input x has shape [B*T, C, H, W]
31
+ # Output will have shape [B*T, feature_dim]
32
+ return self.model(x)
33
+
34
+ class DeepfakeDetector(nn.Module):
35
+ """
36
+ Combines the CNN extractor and LSTM sequencer to classify a video.
37
+ """
38
+ def __init__(self, cnn_feature_dim, lstm_hidden_size=512, lstm_layers=2, num_classes=2, dropout=0.5):
39
+ """
40
+ Args:
41
+ cnn_feature_dim (int): The output dimension from our FeatureExtractor (e.g., 2048 for ResNeXt50)
42
+ lstm_hidden_size (int): The number of features in the LSTM's hidden state.
43
+ lstm_layers (int): The number of stacked LSTM layers.
44
+ num_classes (int): The number of output classes (2: Real/Fake).
45
+ dropout (float): Dropout probability for regularization.
46
+ """
47
+ super(DeepfakeDetector, self).__init__()
48
+
49
+ self.feature_extractor = FeatureExtractor(freeze=True)
50
+ self.lstm_hidden_size = lstm_hidden_size
51
+ self.lstm_layers = lstm_layers
52
+
53
+ # --- Sequence Modeling (LSTM) ---
54
+ # The LSTM will take the CNN features for each frame as input
55
+ self.lstm = nn.LSTM(
56
+ input_size=cnn_feature_dim,
57
+ hidden_size=lstm_hidden_size,
58
+ num_layers=lstm_layers,
59
+ batch_first=True, # Input shape is [BatchSize, SeqLength, Features]
60
+ bidirectional=True, # It will look at the sequence forwards and backwards
61
+ dropout=dropout if lstm_layers > 1 else 0
62
+ )
63
+
64
+ # --- Classification Head ---
65
+ # We'll build a small classifier on top of the LSTM's output
66
+ self.fc1 = nn.Linear(
67
+ lstm_hidden_size * 2, # * 2 because the LSTM is bidirectional
68
+ lstm_hidden_size // 2
69
+ )
70
+ self.relu = nn.ReLU()
71
+ self.dropout = nn.Dropout(dropout)
72
+ self.fc2 = nn.Linear(lstm_hidden_size // 2, num_classes) # Final output: 2 classes
73
+
74
+ def forward(self, x):
75
+ # Input x has shape: [B, T, C, H, W]
76
+ # B = Batch Size
77
+ # T = Sequence Length (e.g., 20 frames)
78
+ # C, H, W = Frame dimensions (3, 224, 224)
79
+
80
+ batch_size, seq_len, c, h, w = x.shape
81
+
82
+ # --- 1. Feature Extraction (CNN) ---
83
+ # We need to pass all frames through the CNN.
84
+ # Reshape to [B * T, C, H, W] to treat all frames as one big batch.
85
+ x_flat = x.view(batch_size * seq_len, c, h, w)
86
+
87
+ features = self.feature_extractor(x_flat)
88
+ # 'features' now has shape [B * T, cnn_feature_dim]
89
+
90
+ # --- 2. Sequence Modeling (LSTM) ---
91
+ # Reshape features back into sequences: [B, T, cnn_feature_dim]
92
+ features_seq = features.view(batch_size, seq_len, -1)
93
+
94
+ # Pass the sequence of features through the LSTM
95
+ # lstm_out shape: [B, T, 2 * lstm_hidden_size] (because bidirectional)
96
+ # h_n, c_n are the final hidden/cell states, which we don't need here
97
+ lstm_out, (h_n, c_n) = self.lstm(features_seq)
98
+
99
+ # We'll use the output from the *last* time step for classification
100
+ # lstm_out[:, -1, :] gets the output of the last frame in the sequence
101
+ last_time_step_out = lstm_out[:, -1, :]
102
+ # Shape is now [B, 2 * lstm_hidden_size]
103
+
104
+ # --- 3. Classification ---
105
+ # Pass the LSTM's final output through our classifier
106
+ x = self.dropout(self.relu(self.fc1(last_time_step_out)))
107
+ out = self.fc2(x)
108
+ # 'out' shape: [B, num_classes] (e.g., [8, 2])
109
+
110
+ return out
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- Backend API ---
2
+ fastapi==0.111.0
3
+ uvicorn==0.30.1
4
+ python-multipart==0.0.9
5
+
6
+ # --- Core ML Libraries (Stable for Py 3.11) ---
7
+ torch --index-url https://download.pytorch.org/whl/cpu
8
+ torchvision --index-url https://download.pytorch.org/whl/cpu
9
+
10
+ # --- Face Detection & Processing ---
11
+ facenet-pytorch==2.5.3
12
+ opencv-python-headless==4.10.0.84
13
+ numpy==1.26.4
14
+ pandas==2.2.2
15
+ scikit-learn==1.5.1
16
+ matplotlib==3.8.2
17
+
18
+ # --- AI Text Detection (ModernBERT requires >= 4.48.0) ---
19
+ transformers>=4.48.0
20
+ huggingface-hub>=0.20.0
21
+ accelerate>=0.26.0
slop_detector.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AI Text Detector Integration Module
3
+
4
+ This module integrates the slop-detector-bert model from Hugging Face
5
+ for detecting AI-generated text content. It can be used in combination
6
+ with the deepfake video detector for multi-modal analysis.
7
+
8
+ Model: gouwsxander/slop-detector-bert
9
+ - BERT-based classifier for detecting AI-generated text
10
+ - Trained on Wikipedia human-written vs AI-rewritten paragraphs
11
+ - Labels: LABEL_0 (HUMAN), LABEL_1 (AI)
12
+ - This is a PEFT/LoRA adapter on bert-base-cased
13
+ """
14
+
15
+ import torch
16
+ import torch.nn.functional as F
17
+ from typing import Optional
18
+ from dataclasses import dataclass
19
+
20
+
21
+ @dataclass
22
+ class SlopDetectionResult:
23
+ """Result from AI text detection."""
24
+ text: str
25
+ label: str # "HUMAN" or "AI"
26
+ confidence: float
27
+ is_ai_generated: bool
28
+
29
+
30
+ class SlopDetector:
31
+
32
+ # Using the requested ModernBERT model
33
+ # Note: ModernBERT requires transformers >= 4.48.0
34
+ MODEL_NAME = "AICodexLab/answerdotai-ModernBERT-base-ai-detector"
35
+
36
+ def __init__(self, device: Optional[str] = None):
37
+
38
+ self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
39
+ self._model = None
40
+ self._tokenizer = None
41
+ self._loaded = False
42
+
43
+ def load_model(self) -> None:
44
+ """Lazily load the model from Hugging Face."""
45
+ if self._loaded:
46
+ return
47
+
48
+ try:
49
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
50
+
51
+ print(f"Loading ModernBERT detector on {self.device}...")
52
+
53
+ # Load tokenizer
54
+ self._tokenizer = AutoTokenizer.from_pretrained(self.MODEL_NAME)
55
+
56
+ # Load model
57
+ self._model = AutoModelForSequenceClassification.from_pretrained(
58
+ self.MODEL_NAME,
59
+ num_labels=2,
60
+ trust_remote_code=True
61
+ )
62
+ self._model = self._model.to(self.device)
63
+ self._model.eval()
64
+
65
+ self._loaded = True
66
+ print("ModernBERT detector loaded successfully!")
67
+
68
+ except Exception as e:
69
+ print(f"Error loading ModernBERT detector: {e}")
70
+ print("Tip: Ensure you have transformers>=4.48.0 installed.")
71
+ raise
72
+
73
+ def detect(self, text: str) -> SlopDetectionResult:
74
+
75
+ self.load_model()
76
+
77
+ if not text or not text.strip():
78
+ return SlopDetectionResult(
79
+ text=text,
80
+ label="UNKNOWN",
81
+ confidence=0.0,
82
+ is_ai_generated=False
83
+ )
84
+
85
+ # Tokenize with truncation
86
+ inputs = self._tokenizer(
87
+ text,
88
+ return_tensors="pt",
89
+ max_length=512,
90
+ truncation=True,
91
+ padding=True
92
+ )
93
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
94
+
95
+ with torch.no_grad():
96
+ outputs = self._model(**inputs)
97
+ probabilities = F.softmax(outputs.logits, dim=-1)
98
+
99
+ # Get prediction
100
+ predicted_class_id = probabilities.argmax().item()
101
+ confidence = probabilities[0, predicted_class_id].item() * 100
102
+
103
+ # LABEL_1 = AI, LABEL_0 = HUMAN
104
+ is_ai = predicted_class_id == 1
105
+ label = "AI" if is_ai else "HUMAN"
106
+
107
+ return SlopDetectionResult(
108
+ text=text[:500] + "..." if len(text) > 500 else text,
109
+ label=label,
110
+ confidence=confidence,
111
+ is_ai_generated=is_ai
112
+ )
113
+
114
+ def detect_batch(self, texts: list[str]) -> list[SlopDetectionResult]:
115
+ self.load_model()
116
+
117
+ results = []
118
+ for text in texts:
119
+ results.append(self.detect(text))
120
+
121
+ return results
122
+
123
+ def analyze_paragraphs(self, full_text: str) -> dict:
124
+ self.load_model()
125
+
126
+ # Split into paragraphs
127
+ paragraphs = [p.strip() for p in full_text.split('\n') if len(p.strip()) > 20]
128
+
129
+ if not paragraphs:
130
+ return {
131
+ "overall_label": "UNKNOWN",
132
+ "overall_confidence": 0.0,
133
+ "ai_probability": 0.0,
134
+ "paragraph_count": 0,
135
+ "ai_paragraph_count": 0,
136
+ "details": []
137
+ }
138
+
139
+ # Analyze each paragraph
140
+ paragraph_results = self.detect_batch(paragraphs)
141
+
142
+ # Calculate aggregate metrics
143
+ ai_count = sum(1 for r in paragraph_results if r.is_ai_generated)
144
+ ai_confidences = [r.confidence for r in paragraph_results if r.is_ai_generated]
145
+ human_confidences = [r.confidence for r in paragraph_results if not r.is_ai_generated]
146
+
147
+ # Overall probability based on paragraph analysis
148
+ ai_probability = (ai_count / len(paragraphs)) * 100
149
+
150
+ # Determine overall label (majority vote with confidence weighting)
151
+ if ai_count > len(paragraphs) / 2:
152
+ overall_label = "AI"
153
+ overall_confidence = sum(ai_confidences) / len(ai_confidences) if ai_confidences else 0
154
+ else:
155
+ overall_label = "HUMAN"
156
+ overall_confidence = sum(human_confidences) / len(human_confidences) if human_confidences else 0
157
+
158
+ return {
159
+ "overall_label": overall_label,
160
+ "overall_confidence": round(overall_confidence, 2),
161
+ "ai_probability": round(ai_probability, 2),
162
+ "paragraph_count": len(paragraphs),
163
+ "ai_paragraph_count": ai_count,
164
+ "details": [
165
+ {
166
+ "paragraph_preview": r.text[:100] + "..." if len(r.text) > 100 else r.text,
167
+ "label": r.label,
168
+ "confidence": round(r.confidence, 2)
169
+ }
170
+ for r in paragraph_results
171
+ ]
172
+ }
173
+
174
+
175
+ # Singleton instance for easy import
176
+ _detector_instance: Optional[SlopDetector] = None
177
+
178
+
179
+ def get_slop_detector() -> SlopDetector:
180
+ """Get or create the singleton SlopDetector instance."""
181
+ global _detector_instance
182
+ if _detector_instance is None:
183
+ _detector_instance = SlopDetector()
184
+ return _detector_instance
185
+
186
+
187
+ def detect_ai_text(text: str) -> SlopDetectionResult:
188
+
189
+ detector = get_slop_detector()
190
+ return detector.detect(text)
191
+
192
+
193
+ def analyze_text_content(text: str) -> dict:
194
+
195
+ detector = get_slop_detector()
196
+ return detector.analyze_paragraphs(text)
197
+
198
+
199
+ # Example usage and testing
200
+ if __name__ == "__main__":
201
+ # Test the detector
202
+ test_texts = [
203
+ # Human-like text (original Wikipedia style)
204
+ "Born in Bristol and raised in Glastonbury to an English father and Belgian mother, "
205
+ "Norris began competitive kart racing aged eight. After a successful karting career, "
206
+ "which culminated in his victory at the direct-drive World Championship in 2014, "
207
+ "Norris graduated to junior formulae.",
208
+
209
+ # AI-like text (more polished/structured)
210
+ "Born in Bristol and raised in Glastonbury to an English father and a Belgian mother, "
211
+ "Norris began competing in karting at the age of eight. He enjoyed a successful karting "
212
+ "career, culminating in his victory at the direct-drive World Championship in 2014, "
213
+ "before progressing into the junior single-seater categories.",
214
+ ]
215
+
216
+ print("=" * 60)
217
+ print("AI Text Detection Test")
218
+ print("=" * 60)
219
+
220
+ detector = SlopDetector()
221
+
222
+ for i, text in enumerate(test_texts, 1):
223
+ result = detector.detect(text)
224
+ print(f"\nText {i}:")
225
+ print(f" Preview: {text[:80]}...")
226
+ print(f" Label: {result.label}")
227
+ print(f" Confidence: {result.confidence:.2f}%")
228
+ print(f" Is AI Generated: {result.is_ai_generated}")