Spaces:

RishiXD
/

IndiaNext-Hackathon

Sleeping

File size: 5,164 Bytes

dc085f2

# explainer.py
# Generates forensic explanations using HuggingFace chat router
# Falls back to a structured template if API fails

import os
import json
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

_client = None


def _get_client() -> OpenAI:
    """Lazy-init the HF chat client."""
    global _client
    if _client is None:
        token = os.getenv("HF_TOKEN")
        if not token:
            raise RuntimeError("HF_TOKEN not set in .env file")
        _client = OpenAI(
            base_url="https://router.huggingface.co/v1",
            api_key=token,
        )
    return _client


def explain_detection(detection: dict, input_type: str) -> dict:
    """
    Generate a three-audience forensic explanation for a detection result.
    Returns a dict with technical_signals, plain_english, etc.
    Falls back gracefully if the LLM call fails.
    """
    try:
        return _call_llm(detection, input_type)
    except Exception as e:
        print(f"  ⚠️  Explainer LLM failed ({e}), using fallback template.")
        return _fallback(detection)


def _call_llm(detection: dict, input_type: str) -> dict:
    verdict = detection.get("verdict", "UNKNOWN")
    confidence = detection.get("confidence", 0)
    severity = detection.get("severity", "LOW")
    extra = ""

    if input_type == "video":
        extra = f"""
- Fake frame ratio: {detection.get('fake_probability', 'N/A')}%
- Frames analyzed: {detection.get('frames_analyzed', 'N/A')}
- Most suspicious timestamp: {detection.get('most_suspicious_timestamp', 'N/A')}s"""

    prompt = f"""You are a deepfake forensics expert for SENTINEL, an AI-powered cybersecurity platform.

Detection result:
- Input type: {input_type}
- Verdict: {verdict}
- Confidence: {confidence}%
- Severity: {severity}{extra}

Return ONLY a valid JSON object — no markdown, no explanation, no extra text.

{{
  "technical_signals": [
    "specific forensic artifact 1 (e.g. GAN grid pattern at 512px boundary)",
    "specific forensic artifact 2 (e.g. facial blending seam visible at jaw line)",
    "specific forensic artifact 3 (e.g. unnatural specular reflection in left eye)"
  ],
  "plain_english": "2 clear sentences explaining this to a non-technical person.",
  "manipulation_areas": ["facial region 1", "facial region 2"],
  "recommended_action": "One specific action the user should take right now.",
  "mitre_technique": "T1565.001 - Stored Data Manipulation"
}}

Rules:
- If DEEPFAKE: name real GAN artifacts — boundary blending, texture inconsistency, eye reflection anomalies, lighting direction mismatch, hair edge artifacts, temporal flickering.
- If AUTHENTIC: name the positive signals — consistent EXIF metadata, natural skin texture variance, coherent lighting, authentic noise patterns.
- Be specific. Never use generic phrases like "image looks suspicious"."""

    client = _get_client()
    completion = client.chat.completions.create(
        model="mistralai/Mistral-7B-Instruct-v0.3",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=600,
        temperature=0.3,  # Lower temp = more consistent JSON output
    )

    raw = completion.choices[0].message.content.strip()
    print(f"  🤖 Explainer raw output: {raw[:100]}...")

    # Strip markdown code fences if present
    if "```" in raw:
        parts = raw.split("```")
        for part in parts:
            part = part.strip()
            if part.startswith("json"):
                part = part[4:].strip()
            if part.startswith("{"):
                raw = part
                break

    return json.loads(raw)


def _fallback(detection: dict) -> dict:
    """Structured fallback when LLM is unavailable."""
    verdict = detection.get("verdict", "UNKNOWN")
    confidence = detection.get("confidence", 0)

    if verdict == "DEEPFAKE":
        signals = [
            f"Model confidence {confidence}% indicates high likelihood of synthetic generation",
            "GAN-based artifacts detected in facial texture regions",
            "Boundary blending inconsistencies identified near facial edges",
        ]
        plain = (
            f"This content appears to be AI-generated or manipulated with {confidence}% confidence. "
            "It shows technical patterns characteristic of deepfake generation tools."
        )
        action = "Do not share or use this content. Verify the original source independently."
    else:
        signals = [
            f"Authenticity confidence: {confidence}%",
            "Natural noise distribution consistent with real camera capture",
            "No GAN fingerprint patterns detected",
        ]
        plain = (
            f"This content appears authentic with {confidence}% confidence. "
            "No deepfake manipulation signatures were detected."
        )
        action = "Content appears authentic. Standard verification still recommended for sensitive use cases."

    return {
        "technical_signals": signals,
        "plain_english": plain,
        "manipulation_areas": [],
        "recommended_action": action,
        "mitre_technique": "T1565.001 - Stored Data Manipulation",
    }