Spaces:
Sleeping
Sleeping
| # explainer.py | |
| # Generates forensic explanations using HuggingFace chat router | |
| # Falls back to a structured template if API fails | |
| import os | |
| import json | |
| from openai import OpenAI | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| _client = None | |
| def _get_client() -> OpenAI: | |
| """Lazy-init the HF chat client.""" | |
| global _client | |
| if _client is None: | |
| token = os.getenv("HF_TOKEN") | |
| if not token: | |
| raise RuntimeError("HF_TOKEN not set in .env file") | |
| _client = OpenAI( | |
| base_url="https://router.huggingface.co/v1", | |
| api_key=token, | |
| ) | |
| return _client | |
| def explain_detection(detection: dict, input_type: str) -> dict: | |
| """ | |
| Generate a three-audience forensic explanation for a detection result. | |
| Returns a dict with technical_signals, plain_english, etc. | |
| Falls back gracefully if the LLM call fails. | |
| """ | |
| try: | |
| return _call_llm(detection, input_type) | |
| except Exception as e: | |
| print(f" β οΈ Explainer LLM failed ({e}), using fallback template.") | |
| return _fallback(detection) | |
| def _call_llm(detection: dict, input_type: str) -> dict: | |
| verdict = detection.get("verdict", "UNKNOWN") | |
| confidence = detection.get("confidence", 0) | |
| severity = detection.get("severity", "LOW") | |
| extra = "" | |
| if input_type == "video": | |
| extra = f""" | |
| - Fake frame ratio: {detection.get('fake_probability', 'N/A')}% | |
| - Frames analyzed: {detection.get('frames_analyzed', 'N/A')} | |
| - Most suspicious timestamp: {detection.get('most_suspicious_timestamp', 'N/A')}s""" | |
| prompt = f"""You are a deepfake forensics expert for SENTINEL, an AI-powered cybersecurity platform. | |
| Detection result: | |
| - Input type: {input_type} | |
| - Verdict: {verdict} | |
| - Confidence: {confidence}% | |
| - Severity: {severity}{extra} | |
| Return ONLY a valid JSON object β no markdown, no explanation, no extra text. | |
| {{ | |
| "technical_signals": [ | |
| "specific forensic artifact 1 (e.g. GAN grid pattern at 512px boundary)", | |
| "specific forensic artifact 2 (e.g. facial blending seam visible at jaw line)", | |
| "specific forensic artifact 3 (e.g. unnatural specular reflection in left eye)" | |
| ], | |
| "plain_english": "2 clear sentences explaining this to a non-technical person.", | |
| "manipulation_areas": ["facial region 1", "facial region 2"], | |
| "recommended_action": "One specific action the user should take right now.", | |
| "mitre_technique": "T1565.001 - Stored Data Manipulation" | |
| }} | |
| Rules: | |
| - If DEEPFAKE: name real GAN artifacts β boundary blending, texture inconsistency, eye reflection anomalies, lighting direction mismatch, hair edge artifacts, temporal flickering. | |
| - If AUTHENTIC: name the positive signals β consistent EXIF metadata, natural skin texture variance, coherent lighting, authentic noise patterns. | |
| - Be specific. Never use generic phrases like "image looks suspicious".""" | |
| client = _get_client() | |
| completion = client.chat.completions.create( | |
| model="mistralai/Mistral-7B-Instruct-v0.3", | |
| messages=[{"role": "user", "content": prompt}], | |
| max_tokens=600, | |
| temperature=0.3, # Lower temp = more consistent JSON output | |
| ) | |
| raw = completion.choices[0].message.content.strip() | |
| print(f" π€ Explainer raw output: {raw[:100]}...") | |
| # Strip markdown code fences if present | |
| if "```" in raw: | |
| parts = raw.split("```") | |
| for part in parts: | |
| part = part.strip() | |
| if part.startswith("json"): | |
| part = part[4:].strip() | |
| if part.startswith("{"): | |
| raw = part | |
| break | |
| return json.loads(raw) | |
| def _fallback(detection: dict) -> dict: | |
| """Structured fallback when LLM is unavailable.""" | |
| verdict = detection.get("verdict", "UNKNOWN") | |
| confidence = detection.get("confidence", 0) | |
| if verdict == "DEEPFAKE": | |
| signals = [ | |
| f"Model confidence {confidence}% indicates high likelihood of synthetic generation", | |
| "GAN-based artifacts detected in facial texture regions", | |
| "Boundary blending inconsistencies identified near facial edges", | |
| ] | |
| plain = ( | |
| f"This content appears to be AI-generated or manipulated with {confidence}% confidence. " | |
| "It shows technical patterns characteristic of deepfake generation tools." | |
| ) | |
| action = "Do not share or use this content. Verify the original source independently." | |
| else: | |
| signals = [ | |
| f"Authenticity confidence: {confidence}%", | |
| "Natural noise distribution consistent with real camera capture", | |
| "No GAN fingerprint patterns detected", | |
| ] | |
| plain = ( | |
| f"This content appears authentic with {confidence}% confidence. " | |
| "No deepfake manipulation signatures were detected." | |
| ) | |
| action = "Content appears authentic. Standard verification still recommended for sensitive use cases." | |
| return { | |
| "technical_signals": signals, | |
| "plain_english": plain, | |
| "manipulation_areas": [], | |
| "recommended_action": action, | |
| "mitre_technique": "T1565.001 - Stored Data Manipulation", | |
| } |