genai-deepdetect / modules /m5_explain.py
akagtag's picture
Initial deploy: M1 SyncNet + M2 CLIP + M3 ViT + M5 Llama NIM
16d70ee verified
"""M5 Explain — NVIDIA NIM Llama-3.1-8B-Instruct."""
from __future__ import annotations
import os
from openai import OpenAI
class ExplainModule:
"""NVIDIA NIM free tier: ~40 req/min."""
def __init__(self):
self.client = OpenAI(
api_key=os.environ.get("NVIDIA_API_KEY", ""),
base_url="https://integrate.api.nvidia.com/v1",
)
self.model = "meta/llama-3.1-8b-instruct"
def explain(
self,
fakescore: float,
s1: float,
s2: float,
s3: float,
weights: dict,
attribution: dict,
segments: list,
top_generator: str,
) -> str:
verdict = "FAKE" if fakescore > 0.5 else "REAL"
conf = (
"high" if abs(fakescore - 0.5) > 0.3
else "moderate" if abs(fakescore - 0.5) > 0.15
else "low"
)
seg_text = ""
if segments:
seg_text = "Flagged timestamps: " + ", ".join(
f"{s['time']}s (score={s['score']})" for s in segments[:5]
)
attr_text = ""
if attribution:
top3 = sorted(attribution.items(), key=lambda x: -x[1])[:3]
attr_text = "Top generators: " + ", ".join(
f"{n}: {p * 100:.1f}%" for n, p in top3
)
prompt = f"""You are a forensic AI analyst. Analyze these deepfake detection results. Be specific.
Results:
- Verdict: {verdict} (FakeScore: {fakescore:.3f}, confidence: {conf})
- Lip-Sync (M1): {s1:.3f} (weight: {weights.get('lip_sync', 'N/A')})
- Fingerprint (M2): {s2:.3f} (weight: {weights.get('fingerprint', 'N/A')})
- Temporal-GNN (M3): {s3:.3f} (weight: {weights.get('graph_gnn', 'N/A')})
{seg_text}
{attr_text}
- Most likely generator: {top_generator}
Write 3-5 sentences referencing specific scores and timestamps."""
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "You are a forensic deepfake analyst. Be precise and concise."},
{"role": "user", "content": prompt},
],
max_tokens=300,
temperature=0.3,
)
return response.choices[0].message.content.strip()
except Exception:
return self._fallback(verdict, fakescore, s1, s2, s3, top_generator, conf)
def _fallback(self, verdict, fakescore, s1, s2, s3, top_gen, conf) -> str:
if verdict == "FAKE":
return (
f"Video classified as {verdict} with {conf} confidence (FakeScore: {fakescore:.3f}). "
f"Lip-sync scored {s1:.2f} indicating "
f"{'significant' if s1 > 0.7 else 'moderate' if s1 > 0.5 else 'minimal'} audio-visual inconsistency. "
f"Style fingerprinting scored {s2:.2f}, likely generated by {top_gen}. "
f"Temporal graph analysis scored {s3:.2f}."
)
return (
f"Video classified as {verdict} with {conf} confidence (FakeScore: {fakescore:.3f}). "
"All detection modules returned scores below detection threshold, "
"suggesting authentic audio-visual correspondence."
)