Spaces:
Running
Running
Anish commited on
Commit ·
cc16b3b
1
Parent(s): 86c1923
[Feature Added] > Model Drift Detection mechanism. This feature checks, if the model is confident in predicting the new uploaded images/videos as AI or not, if not then we need to retrain the model.
Browse files
backend/app/ai/drift_monitor.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sqlalchemy.orm import Session
|
| 4 |
+
from datetime import datetime, timedelta, UTC
|
| 5 |
+
from app.models.file_model import File
|
| 6 |
+
from scipy.stats import wasserstein_distance
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
class DriftMonitor:
|
| 11 |
+
def __init__(self, db: Session):
|
| 12 |
+
self.db = db
|
| 13 |
+
self.CONFIDENCE_DROP_THRESHOLD = 0.15
|
| 14 |
+
self.DISTRIBUTION_DRIFT_THRESHOLD = 0.20
|
| 15 |
+
self.reference_days = 30
|
| 16 |
+
self.recent_days = 7
|
| 17 |
+
|
| 18 |
+
def _get_historical_data(self, days_ago_start: int, days_ago_end: int):
|
| 19 |
+
end_date = datetime.now(UTC) - timedelta(days=days_ago_end)
|
| 20 |
+
start_date = datetime.now(UTC) - timedelta(days=days_ago_start)
|
| 21 |
+
|
| 22 |
+
records = self.db.query(File.result, File.confidence).filter(
|
| 23 |
+
File.created_at >= start_date,
|
| 24 |
+
File.created_at <= end_date,
|
| 25 |
+
File.status == "Completed",
|
| 26 |
+
File.confidence != None
|
| 27 |
+
).all()
|
| 28 |
+
|
| 29 |
+
scores = [r.confidence for r in records]
|
| 30 |
+
classifications = [1 if "AI" in r.result or "Suspicious" in r.result else 0 for r in records]
|
| 31 |
+
|
| 32 |
+
return scores, classifications
|
| 33 |
+
|
| 34 |
+
def detect_drift(self):
|
| 35 |
+
ref_scores, ref_classes = self._get_historical_data(self.reference_days, self.recent_days)
|
| 36 |
+
recent_scores, recent_classes = self._get_historical_data(self.recent_days, 0)
|
| 37 |
+
|
| 38 |
+
if len(ref_scores) < 50 or len(recent_scores) < 20:
|
| 39 |
+
logger.info("Insufficient data to perform drink analysis.")
|
| 40 |
+
return False
|
| 41 |
+
|
| 42 |
+
ref_avg_conf = np.mean(ref_scores)
|
| 43 |
+
recent_avg_conf = np.mean(recent_scores)
|
| 44 |
+
|
| 45 |
+
confidence_drop = ref_avg_conf - recent_avg_conf
|
| 46 |
+
|
| 47 |
+
if confidence_drop > self.CONFIDENCE_DROP_THRESHOLD:
|
| 48 |
+
logger.warning(f"DRIFT ALERT: Average AI confidence dropped significantly! (Historical: {ref_avg_conf:.2f}, Recent: {recent_avg_conf:.2f})")
|
| 49 |
+
|
| 50 |
+
drift_distance = wasserstein_distance(ref_scores, recent_scores)
|
| 51 |
+
|
| 52 |
+
if drift_distance > self.DISTRIBUTION_DRIFT_THRESHOLD:
|
| 53 |
+
logger.warning(f"DRIFT ALERT: Huge shift in prediction patterns! Statistical distance: {drift_distance:.2f}")
|
| 54 |
+
|
| 55 |
+
ref_ai_ratio = sum(ref_classes) / len(ref_classes)
|
| 56 |
+
recent_ai_ratio = sum(recent_classes) / len(recent_classes)
|
| 57 |
+
|
| 58 |
+
if abs(ref_ai_ratio - recent_ai_ratio) > 0.30:
|
| 59 |
+
logger.warning(f"DRIFT ALERT: Abnormal spike/drop in AI classifications! (Historical Ratio: {ref_ai_ratio:.2f}, Recent: {recent_ai_ratio:.2f})")
|
| 60 |
+
|
| 61 |
+
return True
|