IndiaNext-Hackathon / explainer.py
RishiXD's picture
Upload 3 files
dc085f2 verified
# explainer.py
# Generates forensic explanations using HuggingFace chat router
# Falls back to a structured template if API fails
import os
import json
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
_client = None
def _get_client() -> OpenAI:
"""Lazy-init the HF chat client."""
global _client
if _client is None:
token = os.getenv("HF_TOKEN")
if not token:
raise RuntimeError("HF_TOKEN not set in .env file")
_client = OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=token,
)
return _client
def explain_detection(detection: dict, input_type: str) -> dict:
"""
Generate a three-audience forensic explanation for a detection result.
Returns a dict with technical_signals, plain_english, etc.
Falls back gracefully if the LLM call fails.
"""
try:
return _call_llm(detection, input_type)
except Exception as e:
print(f" ⚠️ Explainer LLM failed ({e}), using fallback template.")
return _fallback(detection)
def _call_llm(detection: dict, input_type: str) -> dict:
verdict = detection.get("verdict", "UNKNOWN")
confidence = detection.get("confidence", 0)
severity = detection.get("severity", "LOW")
extra = ""
if input_type == "video":
extra = f"""
- Fake frame ratio: {detection.get('fake_probability', 'N/A')}%
- Frames analyzed: {detection.get('frames_analyzed', 'N/A')}
- Most suspicious timestamp: {detection.get('most_suspicious_timestamp', 'N/A')}s"""
prompt = f"""You are a deepfake forensics expert for SENTINEL, an AI-powered cybersecurity platform.
Detection result:
- Input type: {input_type}
- Verdict: {verdict}
- Confidence: {confidence}%
- Severity: {severity}{extra}
Return ONLY a valid JSON object β€” no markdown, no explanation, no extra text.
{{
"technical_signals": [
"specific forensic artifact 1 (e.g. GAN grid pattern at 512px boundary)",
"specific forensic artifact 2 (e.g. facial blending seam visible at jaw line)",
"specific forensic artifact 3 (e.g. unnatural specular reflection in left eye)"
],
"plain_english": "2 clear sentences explaining this to a non-technical person.",
"manipulation_areas": ["facial region 1", "facial region 2"],
"recommended_action": "One specific action the user should take right now.",
"mitre_technique": "T1565.001 - Stored Data Manipulation"
}}
Rules:
- If DEEPFAKE: name real GAN artifacts β€” boundary blending, texture inconsistency, eye reflection anomalies, lighting direction mismatch, hair edge artifacts, temporal flickering.
- If AUTHENTIC: name the positive signals β€” consistent EXIF metadata, natural skin texture variance, coherent lighting, authentic noise patterns.
- Be specific. Never use generic phrases like "image looks suspicious"."""
client = _get_client()
completion = client.chat.completions.create(
model="mistralai/Mistral-7B-Instruct-v0.3",
messages=[{"role": "user", "content": prompt}],
max_tokens=600,
temperature=0.3, # Lower temp = more consistent JSON output
)
raw = completion.choices[0].message.content.strip()
print(f" πŸ€– Explainer raw output: {raw[:100]}...")
# Strip markdown code fences if present
if "```" in raw:
parts = raw.split("```")
for part in parts:
part = part.strip()
if part.startswith("json"):
part = part[4:].strip()
if part.startswith("{"):
raw = part
break
return json.loads(raw)
def _fallback(detection: dict) -> dict:
"""Structured fallback when LLM is unavailable."""
verdict = detection.get("verdict", "UNKNOWN")
confidence = detection.get("confidence", 0)
if verdict == "DEEPFAKE":
signals = [
f"Model confidence {confidence}% indicates high likelihood of synthetic generation",
"GAN-based artifacts detected in facial texture regions",
"Boundary blending inconsistencies identified near facial edges",
]
plain = (
f"This content appears to be AI-generated or manipulated with {confidence}% confidence. "
"It shows technical patterns characteristic of deepfake generation tools."
)
action = "Do not share or use this content. Verify the original source independently."
else:
signals = [
f"Authenticity confidence: {confidence}%",
"Natural noise distribution consistent with real camera capture",
"No GAN fingerprint patterns detected",
]
plain = (
f"This content appears authentic with {confidence}% confidence. "
"No deepfake manipulation signatures were detected."
)
action = "Content appears authentic. Standard verification still recommended for sensitive use cases."
return {
"technical_signals": signals,
"plain_english": plain,
"manipulation_areas": [],
"recommended_action": action,
"mitre_technique": "T1565.001 - Stored Data Manipulation",
}