Spaces:
Sleeping
Sleeping
File size: 5,164 Bytes
dc085f2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | # explainer.py
# Generates forensic explanations using HuggingFace chat router
# Falls back to a structured template if API fails
import os
import json
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
_client = None
def _get_client() -> OpenAI:
"""Lazy-init the HF chat client."""
global _client
if _client is None:
token = os.getenv("HF_TOKEN")
if not token:
raise RuntimeError("HF_TOKEN not set in .env file")
_client = OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=token,
)
return _client
def explain_detection(detection: dict, input_type: str) -> dict:
"""
Generate a three-audience forensic explanation for a detection result.
Returns a dict with technical_signals, plain_english, etc.
Falls back gracefully if the LLM call fails.
"""
try:
return _call_llm(detection, input_type)
except Exception as e:
print(f" ⚠️ Explainer LLM failed ({e}), using fallback template.")
return _fallback(detection)
def _call_llm(detection: dict, input_type: str) -> dict:
verdict = detection.get("verdict", "UNKNOWN")
confidence = detection.get("confidence", 0)
severity = detection.get("severity", "LOW")
extra = ""
if input_type == "video":
extra = f"""
- Fake frame ratio: {detection.get('fake_probability', 'N/A')}%
- Frames analyzed: {detection.get('frames_analyzed', 'N/A')}
- Most suspicious timestamp: {detection.get('most_suspicious_timestamp', 'N/A')}s"""
prompt = f"""You are a deepfake forensics expert for SENTINEL, an AI-powered cybersecurity platform.
Detection result:
- Input type: {input_type}
- Verdict: {verdict}
- Confidence: {confidence}%
- Severity: {severity}{extra}
Return ONLY a valid JSON object — no markdown, no explanation, no extra text.
{{
"technical_signals": [
"specific forensic artifact 1 (e.g. GAN grid pattern at 512px boundary)",
"specific forensic artifact 2 (e.g. facial blending seam visible at jaw line)",
"specific forensic artifact 3 (e.g. unnatural specular reflection in left eye)"
],
"plain_english": "2 clear sentences explaining this to a non-technical person.",
"manipulation_areas": ["facial region 1", "facial region 2"],
"recommended_action": "One specific action the user should take right now.",
"mitre_technique": "T1565.001 - Stored Data Manipulation"
}}
Rules:
- If DEEPFAKE: name real GAN artifacts — boundary blending, texture inconsistency, eye reflection anomalies, lighting direction mismatch, hair edge artifacts, temporal flickering.
- If AUTHENTIC: name the positive signals — consistent EXIF metadata, natural skin texture variance, coherent lighting, authentic noise patterns.
- Be specific. Never use generic phrases like "image looks suspicious"."""
client = _get_client()
completion = client.chat.completions.create(
model="mistralai/Mistral-7B-Instruct-v0.3",
messages=[{"role": "user", "content": prompt}],
max_tokens=600,
temperature=0.3, # Lower temp = more consistent JSON output
)
raw = completion.choices[0].message.content.strip()
print(f" 🤖 Explainer raw output: {raw[:100]}...")
# Strip markdown code fences if present
if "```" in raw:
parts = raw.split("```")
for part in parts:
part = part.strip()
if part.startswith("json"):
part = part[4:].strip()
if part.startswith("{"):
raw = part
break
return json.loads(raw)
def _fallback(detection: dict) -> dict:
"""Structured fallback when LLM is unavailable."""
verdict = detection.get("verdict", "UNKNOWN")
confidence = detection.get("confidence", 0)
if verdict == "DEEPFAKE":
signals = [
f"Model confidence {confidence}% indicates high likelihood of synthetic generation",
"GAN-based artifacts detected in facial texture regions",
"Boundary blending inconsistencies identified near facial edges",
]
plain = (
f"This content appears to be AI-generated or manipulated with {confidence}% confidence. "
"It shows technical patterns characteristic of deepfake generation tools."
)
action = "Do not share or use this content. Verify the original source independently."
else:
signals = [
f"Authenticity confidence: {confidence}%",
"Natural noise distribution consistent with real camera capture",
"No GAN fingerprint patterns detected",
]
plain = (
f"This content appears authentic with {confidence}% confidence. "
"No deepfake manipulation signatures were detected."
)
action = "Content appears authentic. Standard verification still recommended for sensitive use cases."
return {
"technical_signals": signals,
"plain_english": plain,
"manipulation_areas": [],
"recommended_action": action,
"mitre_technique": "T1565.001 - Stored Data Manipulation",
} |