"""M5 Explain — NVIDIA NIM Llama-3.1-8B-Instruct.""" from __future__ import annotations import os from openai import OpenAI class ExplainModule: """NVIDIA NIM free tier: ~40 req/min.""" def __init__(self): self.client = OpenAI( api_key=os.environ.get("NVIDIA_API_KEY", ""), base_url="https://integrate.api.nvidia.com/v1", ) self.model = "meta/llama-3.1-8b-instruct" def explain( self, fakescore: float, s1: float, s2: float, s3: float, weights: dict, attribution: dict, segments: list, top_generator: str, ) -> str: verdict = "FAKE" if fakescore > 0.5 else "REAL" conf = ( "high" if abs(fakescore - 0.5) > 0.3 else "moderate" if abs(fakescore - 0.5) > 0.15 else "low" ) seg_text = "" if segments: seg_text = "Flagged timestamps: " + ", ".join( f"{s['time']}s (score={s['score']})" for s in segments[:5] ) attr_text = "" if attribution: top3 = sorted(attribution.items(), key=lambda x: -x[1])[:3] attr_text = "Top generators: " + ", ".join( f"{n}: {p * 100:.1f}%" for n, p in top3 ) prompt = f"""You are a forensic AI analyst. Analyze these deepfake detection results. Be specific. Results: - Verdict: {verdict} (FakeScore: {fakescore:.3f}, confidence: {conf}) - Lip-Sync (M1): {s1:.3f} (weight: {weights.get('lip_sync', 'N/A')}) - Fingerprint (M2): {s2:.3f} (weight: {weights.get('fingerprint', 'N/A')}) - Temporal-GNN (M3): {s3:.3f} (weight: {weights.get('graph_gnn', 'N/A')}) {seg_text} {attr_text} - Most likely generator: {top_generator} Write 3-5 sentences referencing specific scores and timestamps.""" try: response = self.client.chat.completions.create( model=self.model, messages=[ {"role": "system", "content": "You are a forensic deepfake analyst. Be precise and concise."}, {"role": "user", "content": prompt}, ], max_tokens=300, temperature=0.3, ) return response.choices[0].message.content.strip() except Exception: return self._fallback(verdict, fakescore, s1, s2, s3, top_generator, conf) def _fallback(self, verdict, fakescore, s1, s2, s3, top_gen, conf) -> str: if verdict == "FAKE": return ( f"Video classified as {verdict} with {conf} confidence (FakeScore: {fakescore:.3f}). " f"Lip-sync scored {s1:.2f} indicating " f"{'significant' if s1 > 0.7 else 'moderate' if s1 > 0.5 else 'minimal'} audio-visual inconsistency. " f"Style fingerprinting scored {s2:.2f}, likely generated by {top_gen}. " f"Temporal graph analysis scored {s3:.2f}." ) return ( f"Video classified as {verdict} with {conf} confidence (FakeScore: {fakescore:.3f}). " "All detection modules returned scores below detection threshold, " "suggesting authentic audio-visual correspondence." )