File size: 5,164 Bytes
dc085f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# explainer.py
# Generates forensic explanations using HuggingFace chat router
# Falls back to a structured template if API fails

import os
import json
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

_client = None


def _get_client() -> OpenAI:
    """Lazy-init the HF chat client."""
    global _client
    if _client is None:
        token = os.getenv("HF_TOKEN")
        if not token:
            raise RuntimeError("HF_TOKEN not set in .env file")
        _client = OpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=token,
        )
    return _client


def explain_detection(detection: dict, input_type: str) -> dict:
    """
    Generate a three-audience forensic explanation for a detection result.
    Returns a dict with technical_signals, plain_english, etc.
    Falls back gracefully if the LLM call fails.
    """
    try:
        return _call_llm(detection, input_type)
    except Exception as e:
        print(f"  ⚠️  Explainer LLM failed ({e}), using fallback template.")
        return _fallback(detection)


def _call_llm(detection: dict, input_type: str) -> dict:
    verdict = detection.get("verdict", "UNKNOWN")
    confidence = detection.get("confidence", 0)
    severity = detection.get("severity", "LOW")
    extra = ""

    if input_type == "video":
        extra = f"""
- Fake frame ratio: {detection.get('fake_probability', 'N/A')}%
- Frames analyzed: {detection.get('frames_analyzed', 'N/A')}
- Most suspicious timestamp: {detection.get('most_suspicious_timestamp', 'N/A')}s"""

    prompt = f"""You are a deepfake forensics expert for SENTINEL, an AI-powered cybersecurity platform.

Detection result:
- Input type: {input_type}
- Verdict: {verdict}
- Confidence: {confidence}%
- Severity: {severity}{extra}

Return ONLY a valid JSON object — no markdown, no explanation, no extra text.

{{
  "technical_signals": [
    "specific forensic artifact 1 (e.g. GAN grid pattern at 512px boundary)",
    "specific forensic artifact 2 (e.g. facial blending seam visible at jaw line)",
    "specific forensic artifact 3 (e.g. unnatural specular reflection in left eye)"
  ],
  "plain_english": "2 clear sentences explaining this to a non-technical person.",
  "manipulation_areas": ["facial region 1", "facial region 2"],
  "recommended_action": "One specific action the user should take right now.",
  "mitre_technique": "T1565.001 - Stored Data Manipulation"
}}

Rules:
- If DEEPFAKE: name real GAN artifacts — boundary blending, texture inconsistency, eye reflection anomalies, lighting direction mismatch, hair edge artifacts, temporal flickering.
- If AUTHENTIC: name the positive signals — consistent EXIF metadata, natural skin texture variance, coherent lighting, authentic noise patterns.
- Be specific. Never use generic phrases like "image looks suspicious"."""

    client = _get_client()
    completion = client.chat.completions.create(
        model="mistralai/Mistral-7B-Instruct-v0.3",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=600,
        temperature=0.3,  # Lower temp = more consistent JSON output
    )

    raw = completion.choices[0].message.content.strip()
    print(f"  🤖 Explainer raw output: {raw[:100]}...")

    # Strip markdown code fences if present
    if "```" in raw:
        parts = raw.split("```")
        for part in parts:
            part = part.strip()
            if part.startswith("json"):
                part = part[4:].strip()
            if part.startswith("{"):
                raw = part
                break

    return json.loads(raw)


def _fallback(detection: dict) -> dict:
    """Structured fallback when LLM is unavailable."""
    verdict = detection.get("verdict", "UNKNOWN")
    confidence = detection.get("confidence", 0)

    if verdict == "DEEPFAKE":
        signals = [
            f"Model confidence {confidence}% indicates high likelihood of synthetic generation",
            "GAN-based artifacts detected in facial texture regions",
            "Boundary blending inconsistencies identified near facial edges",
        ]
        plain = (
            f"This content appears to be AI-generated or manipulated with {confidence}% confidence. "
            "It shows technical patterns characteristic of deepfake generation tools."
        )
        action = "Do not share or use this content. Verify the original source independently."
    else:
        signals = [
            f"Authenticity confidence: {confidence}%",
            "Natural noise distribution consistent with real camera capture",
            "No GAN fingerprint patterns detected",
        ]
        plain = (
            f"This content appears authentic with {confidence}% confidence. "
            "No deepfake manipulation signatures were detected."
        )
        action = "Content appears authentic. Standard verification still recommended for sensitive use cases."

    return {
        "technical_signals": signals,
        "plain_english": plain,
        "manipulation_areas": [],
        "recommended_action": action,
        "mitre_technique": "T1565.001 - Stored Data Manipulation",
    }