Spaces:

RishiXD
/

IndiaNext-Hackathon

Sleeping

App Files Files Community

RishiXD commited on Mar 16

Commit

dc085f2

verified ·

1 Parent(s): ebf3eb1

Upload 3 files

Browse files

Files changed (3) hide show

explainer.py +140 -0
image_detector.py +85 -0
video_detector.py +171 -0

explainer.py ADDED Viewed

	@@ -0,0 +1,140 @@

+# explainer.py
+# Generates forensic explanations using HuggingFace chat router
+# Falls back to a structured template if API fails
+import os
+import json
+from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()
+_client = None
+def _get_client() -> OpenAI:
+    """Lazy-init the HF chat client."""
+    global _client
+    if _client is None:
+        token = os.getenv("HF_TOKEN")
+        if not token:
+            raise RuntimeError("HF_TOKEN not set in .env file")
+        _client = OpenAI(
+            base_url="https://router.huggingface.co/v1",
+            api_key=token,
+        )
+    return _client
+def explain_detection(detection: dict, input_type: str) -> dict:
+    """
+    Generate a three-audience forensic explanation for a detection result.
+    Returns a dict with technical_signals, plain_english, etc.
+    Falls back gracefully if the LLM call fails.
+    """
+    try:
+        return _call_llm(detection, input_type)
+    except Exception as e:
+        print(f"  ⚠️  Explainer LLM failed ({e}), using fallback template.")
+        return _fallback(detection)
+def _call_llm(detection: dict, input_type: str) -> dict:
+    verdict = detection.get("verdict", "UNKNOWN")
+    confidence = detection.get("confidence", 0)
+    severity = detection.get("severity", "LOW")
+    extra = ""
+    if input_type == "video":
+        extra = f"""
+- Fake frame ratio: {detection.get('fake_probability', 'N/A')}%
+- Frames analyzed: {detection.get('frames_analyzed', 'N/A')}
+- Most suspicious timestamp: {detection.get('most_suspicious_timestamp', 'N/A')}s"""
+    prompt = f"""You are a deepfake forensics expert for SENTINEL, an AI-powered cybersecurity platform.
+Detection result:
+- Input type: {input_type}
+- Verdict: {verdict}
+- Confidence: {confidence}%
+- Severity: {severity}{extra}
+Return ONLY a valid JSON object — no markdown, no explanation, no extra text.
+{{
+  "technical_signals": [
+    "specific forensic artifact 1 (e.g. GAN grid pattern at 512px boundary)",
+    "specific forensic artifact 2 (e.g. facial blending seam visible at jaw line)",
+    "specific forensic artifact 3 (e.g. unnatural specular reflection in left eye)"
+  ],
+  "plain_english": "2 clear sentences explaining this to a non-technical person.",
+  "manipulation_areas": ["facial region 1", "facial region 2"],
+  "recommended_action": "One specific action the user should take right now.",
+  "mitre_technique": "T1565.001 - Stored Data Manipulation"
+}}
+Rules:
+- If DEEPFAKE: name real GAN artifacts — boundary blending, texture inconsistency, eye reflection anomalies, lighting direction mismatch, hair edge artifacts, temporal flickering.
+- If AUTHENTIC: name the positive signals — consistent EXIF metadata, natural skin texture variance, coherent lighting, authentic noise patterns.
+- Be specific. Never use generic phrases like "image looks suspicious"."""
+    client = _get_client()
+    completion = client.chat.completions.create(
+        model="mistralai/Mistral-7B-Instruct-v0.3",
+        messages=[{"role": "user", "content": prompt}],
+        max_tokens=600,
+        temperature=0.3,  # Lower temp = more consistent JSON output
+    )
+    raw = completion.choices[0].message.content.strip()
+    print(f"  🤖 Explainer raw output: {raw[:100]}...")
+    # Strip markdown code fences if present
+    if "```" in raw:
+        parts = raw.split("```")
+        for part in parts:
+            part = part.strip()
+            if part.startswith("json"):
+                part = part[4:].strip()
+            if part.startswith("{"):
+                raw = part
+                break
+    return json.loads(raw)
+def _fallback(detection: dict) -> dict:
+    """Structured fallback when LLM is unavailable."""
+    verdict = detection.get("verdict", "UNKNOWN")
+    confidence = detection.get("confidence", 0)
+    if verdict == "DEEPFAKE":
+        signals = [
+            f"Model confidence {confidence}% indicates high likelihood of synthetic generation",
+            "GAN-based artifacts detected in facial texture regions",
+            "Boundary blending inconsistencies identified near facial edges",
+        ]
+        plain = (
+            f"This content appears to be AI-generated or manipulated with {confidence}% confidence. "
+            "It shows technical patterns characteristic of deepfake generation tools."
+        )
+        action = "Do not share or use this content. Verify the original source independently."
+    else:
+        signals = [
+            f"Authenticity confidence: {confidence}%",
+            "Natural noise distribution consistent with real camera capture",
+            "No GAN fingerprint patterns detected",
+        ]
+        plain = (
+            f"This content appears authentic with {confidence}% confidence. "
+            "No deepfake manipulation signatures were detected."
+        )
+        action = "Content appears authentic. Standard verification still recommended for sensitive use cases."
+    return {
+        "technical_signals": signals,
+        "plain_english": plain,
+        "manipulation_areas": [],
+        "recommended_action": action,
+        "mitre_technique": "T1565.001 - Stored Data Manipulation",
+    }

image_detector.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# image_detector.py
+# Uses Sightengine API — 2000 free requests/month, no credit card
+# Accurate deepfake + AI-generated image detection
+import requests
+import os
+from dotenv import load_dotenv
+load_dotenv()
+SIGHTENGINE_USER   = os.getenv("SIGHTENGINE_USER")
+SIGHTENGINE_SECRET = os.getenv("SIGHTENGINE_SECRET")
+class ImageDetector:
+    def __init__(self):
+        if not SIGHTENGINE_USER or not SIGHTENGINE_SECRET:
+            raise RuntimeError(
+                "SIGHTENGINE_USER and SIGHTENGINE_SECRET not set in .env\n"
+                "Get free keys at: https://sightengine.com"
+            )
+        print("Image detector ready — Sightengine API.")
+    def detect(self, image_path: str) -> dict:
+        with open(image_path, "rb") as f:
+            response = requests.post(
+                "https://api.sightengine.com/1.0/check.json",
+                files={"media": f},
+                data={
+                    "models": "deepfake,genai",  # deepfake + AI-generated
+                    "api_user": SIGHTENGINE_USER,
+                    "api_secret": SIGHTENGINE_SECRET,
+                },
+                timeout=30
+            )
+        result = response.json()
+        print(f"Raw output: {result}")
+        if result.get("status") != "success":
+            raise RuntimeError(f"Sightengine error: {result}")
+        return self._parse(result)
+    def _parse(self, result: dict) -> dict:
+        # Sightengine returns scores 0.0 to 1.0
+        # deepfake.score = face swap probability
+        # type.ai_generated = AI generated probability
+        deepfake_score  = result.get("deepfake", {}).get("score", 0.0)
+        ai_gen_score    = result.get("type", {}).get("ai_generated", 0.0)
+        # Take the higher of the two as the fake score
+        fake_score = max(deepfake_score, ai_gen_score)
+        real_score = 1.0 - fake_score
+        verdict = "DEEPFAKE" if fake_score >= 0.5 else "AUTHENTIC"
+        return {
+            "verdict": verdict,
+            "confidence": round(max(fake_score, real_score) * 100, 2),
+            "fake_probability": round(fake_score * 100, 2),
+            "real_probability": round(real_score * 100, 2),
+            "severity": self._severity(fake_score),
+            "deepfake_score": round(deepfake_score * 100, 2),
+            "ai_generated_score": round(ai_gen_score * 100, 2),
+            "model_used": "sightengine-deepfake-genai",
+        }
+    @staticmethod
+    def _severity(score: float) -> str:
+        if score >= 0.90: return "CRITICAL"
+        if score >= 0.75: return "HIGH"
+        if score >= 0.50: return "MEDIUM"
+        return "LOW"
+# Standalone test: python image_detector.py your_image.jpg
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) < 2:
+        print("Usage: python image_detector.py <image_path>")
+        sys.exit(1)
+    detector = ImageDetector()
+    print(detector.detect(sys.argv[1]))

video_detector.py ADDED Viewed

	@@ -0,0 +1,171 @@

+# video_detector.py
+# Primary: Sightengine video API
+# Fallback: frame-by-frame using image detector
+import requests
+import os
+import cv2
+import uuid
+import tempfile
+from dotenv import load_dotenv
+from image_detector import ImageDetector
+load_dotenv()
+SIGHTENGINE_USER   = os.getenv("SIGHTENGINE_USER")
+SIGHTENGINE_SECRET = os.getenv("SIGHTENGINE_SECRET")
+class VideoDetector:
+    def __init__(self, image_detector: ImageDetector):
+        self.image_detector = image_detector
+        print("Video detector ready — Sightengine + frame fallback.")
+    def detect(self, video_path: str) -> dict:
+        # Try Sightengine video first
+        try:
+            return self._detect_via_sightengine(video_path)
+        except Exception as e:
+            print(f"Sightengine video failed ({e}), using frame-by-frame.")
+            return self._detect_frame_by_frame(video_path)
+    def _detect_via_sightengine(self, video_path: str) -> dict:
+        with open(video_path, "rb") as f:
+            response = requests.post(
+                "https://api.sightengine.com/1.0/video/check-sync.json",
+                files={"media": f},
+                data={
+                    "models": "deepfake,genai",
+                    "api_user": SIGHTENGINE_USER,
+                    "api_secret": SIGHTENGINE_SECRET,
+                },
+                timeout=120
+            )
+        result = response.json()
+        print(f"Sightengine video raw: {result}")
+        if result.get("status") != "success":
+            raise RuntimeError(f"Sightengine error: {result}")
+        return self._parse_video_result(result, video_path)
+    def _parse_video_result(self, result: dict, video_path: str) -> dict:
+        # Video result has per-frame data
+        frames = result.get("data", {}).get("frames", [])
+        if not frames:
+            raise RuntimeError("No frames in Sightengine video response")
+        fake_scores = []
+        for frame in frames:
+            deepfake = frame.get("deepfake", {}).get("score", 0.0)
+            ai_gen   = frame.get("type", {}).get("ai_generated", 0.0)
+            fake_scores.append(max(deepfake, ai_gen))
+        avg_fake = sum(fake_scores) / len(fake_scores)
+        fake_frames = sum(1 for s in fake_scores if s >= 0.5)
+        fake_ratio = fake_frames / len(fake_scores)
+        duration = self._get_duration(video_path)
+        return {
+            "verdict": "DEEPFAKE" if avg_fake >= 0.5 else "AUTHENTIC",
+            "confidence": round(max(avg_fake, 1 - avg_fake) * 100, 2),
+            "fake_probability": round(avg_fake * 100, 2),
+            "real_probability": round((1 - avg_fake) * 100, 2),
+            "severity": self._severity(avg_fake),
+            "detection_method": "sightengine_video",
+            "model_used": "sightengine-deepfake-genai",
+            "frames_analyzed": len(fake_scores),
+            "fake_frames_count": fake_frames,
+            "real_frames_count": len(fake_scores) - fake_frames,
+            "duration_seconds": duration,
+            "timeline": [],
+        }
+    def _detect_frame_by_frame(self, video_path: str, max_samples: int = 8) -> dict:
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            raise RuntimeError(f"Could not open video: {video_path}")
+        fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        duration = round(total_frames / fps, 2)
+        sample_every = max(1, total_frames // max_samples)
+        print(f"Video: {duration}s | {total_frames} frames @ {fps:.1f} FPS")
+        frame_results = []
+        frame_count   = 0
+        temp_dir      = tempfile.gettempdir()
+        while cap.isOpened() and len(frame_results) < max_samples:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            if frame_count % sample_every == 0:
+                temp_path = os.path.join(temp_dir, f"sentinel_{uuid.uuid4()}.jpg")
+                cv2.imwrite(temp_path, frame)
+                try:
+                    timestamp = round(frame_count / fps, 2)
+                    result = self.image_detector.detect(temp_path)
+                    result["timestamp_seconds"] = timestamp
+                    result["frame_number"] = frame_count
+                    frame_results.append(result)
+                    print(f"  Frame {len(frame_results)}/{max_samples} @ {timestamp}s → {result['verdict']}")
+                except Exception as e:
+                    print(f"  Frame {frame_count} failed: {e}")
+                finally:
+                    if os.path.exists(temp_path):
+                        os.remove(temp_path)
+            frame_count += 1
+        cap.release()
+        if not frame_results:
+            raise RuntimeError("No frames could be analyzed.")
+        return self._aggregate(frame_results, duration)
+    def _aggregate(self, frame_results: list, duration: float) -> dict:
+        fake_frames  = [r for r in frame_results if r["verdict"] == "DEEPFAKE"]
+        fake_ratio   = len(fake_frames) / len(frame_results)
+        avg_fake_prob = sum(r["fake_probability"] for r in frame_results) / len(frame_results)
+        avg_conf     = sum(r["confidence"] for r in frame_results) / len(frame_results)
+        worst        = max(frame_results, key=lambda x: x.get("fake_probability", 0))
+        # Both conditions must be true to call DEEPFAKE
+        is_deepfake = fake_ratio >= 0.6 and avg_fake_prob >= 70.0
+        return {
+            "verdict": "DEEPFAKE" if is_deepfake else "AUTHENTIC",
+            "confidence": round(avg_conf, 2),
+            "fake_probability": round(avg_fake_prob, 2),
+            "real_probability": round(100 - avg_fake_prob, 2),
+            "severity": self._severity(avg_fake_prob / 100),
+            "detection_method": "frame_by_frame",
+            "model_used": "sightengine-deepfake-genai",
+            "frames_analyzed": len(frame_results),
+            "fake_frames_count": len(fake_frames),
+            "real_frames_count": len(frame_results) - len(fake_frames),
+            "duration_seconds": duration,
+            "most_suspicious_timestamp": worst.get("timestamp_seconds", 0),
+            "timeline": frame_results,
+        }
+    @staticmethod
+    def _get_duration(video_path: str) -> float:
+        cap = cv2.VideoCapture(video_path)
+        fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
+        frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        cap.release()
+        return round(frames / fps, 2)
+    @staticmethod
+    def _severity(score: float) -> str:
+        if score >= 0.90: return "CRITICAL"
+        if score >= 0.75: return "HIGH"
+        if score >= 0.50: return "MEDIUM"
+        return "LOW"