Spaces:

RishiXD
/

IndiaNext-Hackathon

Sleeping

App Files Files Community

IndiaNext-Hackathon / explainer.py

RishiXD

Upload 3 files

dc085f2 verified 13 days ago

raw

history blame contribute delete

5.16 kB

	# explainer.py
	# Generates forensic explanations using HuggingFace chat router
	# Falls back to a structured template if API fails

	import os
	import json
	from openai import OpenAI
	from dotenv import load_dotenv

	load_dotenv()

	_client = None


	def _get_client() -> OpenAI:
	"""Lazy-init the HF chat client."""
	global _client
	if _client is None:
	token = os.getenv("HF_TOKEN")
	if not token:
	raise RuntimeError("HF_TOKEN not set in .env file")
	_client = OpenAI(
	base_url="https://router.huggingface.co/v1",
	api_key=token,
	)
	return _client


	def explain_detection(detection: dict, input_type: str) -> dict:
	"""
	Generate a three-audience forensic explanation for a detection result.
	Returns a dict with technical_signals, plain_english, etc.
	Falls back gracefully if the LLM call fails.
	"""
	try:
	return _call_llm(detection, input_type)
	except Exception as e:
	print(f" ⚠️ Explainer LLM failed ({e}), using fallback template.")
	return _fallback(detection)


	def _call_llm(detection: dict, input_type: str) -> dict:
	verdict = detection.get("verdict", "UNKNOWN")
	confidence = detection.get("confidence", 0)
	severity = detection.get("severity", "LOW")
	extra = ""

	if input_type == "video":
	extra = f"""
	- Fake frame ratio: {detection.get('fake_probability', 'N/A')}%
	- Frames analyzed: {detection.get('frames_analyzed', 'N/A')}
	- Most suspicious timestamp: {detection.get('most_suspicious_timestamp', 'N/A')}s"""

	prompt = f"""You are a deepfake forensics expert for SENTINEL, an AI-powered cybersecurity platform.

	Detection result:
	- Input type: {input_type}
	- Verdict: {verdict}
	- Confidence: {confidence}%
	- Severity: {severity}{extra}

	Return ONLY a valid JSON object — no markdown, no explanation, no extra text.

	{{
	"technical_signals": [
	"specific forensic artifact 1 (e.g. GAN grid pattern at 512px boundary)",
	"specific forensic artifact 2 (e.g. facial blending seam visible at jaw line)",
	"specific forensic artifact 3 (e.g. unnatural specular reflection in left eye)"
	],
	"plain_english": "2 clear sentences explaining this to a non-technical person.",
	"manipulation_areas": ["facial region 1", "facial region 2"],
	"recommended_action": "One specific action the user should take right now.",
	"mitre_technique": "T1565.001 - Stored Data Manipulation"
	}}

	Rules:
	- If DEEPFAKE: name real GAN artifacts — boundary blending, texture inconsistency, eye reflection anomalies, lighting direction mismatch, hair edge artifacts, temporal flickering.
	- If AUTHENTIC: name the positive signals — consistent EXIF metadata, natural skin texture variance, coherent lighting, authentic noise patterns.
	- Be specific. Never use generic phrases like "image looks suspicious"."""

	client = _get_client()
	completion = client.chat.completions.create(
	model="mistralai/Mistral-7B-Instruct-v0.3",
	messages=[{"role": "user", "content": prompt}],
	max_tokens=600,
	temperature=0.3, # Lower temp = more consistent JSON output
	)

	raw = completion.choices[0].message.content.strip()
	print(f" 🤖 Explainer raw output: {raw[:100]}...")

	# Strip markdown code fences if present
	if "```" in raw:
	parts = raw.split("```")
	for part in parts:
	part = part.strip()
	if part.startswith("json"):
	part = part[4:].strip()
	if part.startswith("{"):
	raw = part
	break

	return json.loads(raw)


	def _fallback(detection: dict) -> dict:
	"""Structured fallback when LLM is unavailable."""
	verdict = detection.get("verdict", "UNKNOWN")
	confidence = detection.get("confidence", 0)

	if verdict == "DEEPFAKE":
	signals = [
	f"Model confidence {confidence}% indicates high likelihood of synthetic generation",
	"GAN-based artifacts detected in facial texture regions",
	"Boundary blending inconsistencies identified near facial edges",
	]
	plain = (
	f"This content appears to be AI-generated or manipulated with {confidence}% confidence. "
	"It shows technical patterns characteristic of deepfake generation tools."
	)
	action = "Do not share or use this content. Verify the original source independently."
	else:
	signals = [
	f"Authenticity confidence: {confidence}%",
	"Natural noise distribution consistent with real camera capture",
	"No GAN fingerprint patterns detected",
	]
	plain = (
	f"This content appears authentic with {confidence}% confidence. "
	"No deepfake manipulation signatures were detected."
	)
	action = "Content appears authentic. Standard verification still recommended for sensitive use cases."

	return {
	"technical_signals": signals,
	"plain_english": plain,
	"manipulation_areas": [],
	"recommended_action": action,
	"mitre_technique": "T1565.001 - Stored Data Manipulation",
	}