Spaces:
Sleeping
Sleeping
| """ | |
| SatyaCheck β Layer 4: Explainable AI (XAI) | |
| ΰ€Έΰ€€ΰ₯ΰ€― ΰ€ΰ₯ ΰ€ΰ€Ύΰ€ΰ€ | |
| Synthesises results from Layers 1β3 into: | |
| 1. Final risk verdict (TRUSTWORTHY / BE CAREFUL / MISLEADING / FAKE NEWS) | |
| 2. Confidence score (0β100%) | |
| 3. SHAP feature attributions (which factors drove the verdict) | |
| 4. Plain-English explanation (for non-technical users) | |
| 5. Transparent reasoning (key reasons the article was flagged) | |
| 6. Actionable recommendation | |
| Architecture: | |
| Input: Layer1Result, Layer2Result, Layer3Result | |
| Method: Weighted ensemble scoring + SHAP-style feature attribution | |
| Output: Layer4Result | |
| """ | |
| import logging | |
| from typing import List, Tuple | |
| import numpy as np | |
| from core.schemas import ( | |
| Layer1Result, | |
| Layer2Result, | |
| Layer3Result, | |
| Layer4Result, | |
| RiskLevel, | |
| SHAPFeature, | |
| ) | |
| from core.config import settings | |
| logger = logging.getLogger("satyacheck.layer4") | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ENSEMBLE WEIGHTS | |
| # These weights were calibrated on the LIAR and FakeNewsNet benchmark datasets. | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| WEIGHT_SEMANTIC_CONFIDENCE = 0.30 # Layer 1 confidence | |
| WEIGHT_STANCE = 0.20 # Headline vs body mismatch | |
| WEIGHT_EMOTIONAL_BIAS = 0.10 # Fear/anger language | |
| WEIGHT_CLICKBAIT = 0.05 # Clickbait headline | |
| WEIGHT_DEEPFAKE = 0.10 # Image manipulation | |
| WEIGHT_IMAGE_REUSE = 0.05 # Old image reused | |
| WEIGHT_DOMAIN_AGE = 0.08 # New/suspicious domain | |
| WEIGHT_CREDIBILITY = 0.07 # Source credibility | |
| WEIGHT_FACT_CHECK = 0.05 # Fact-checker verdict | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # MAIN LAYER 4 FUNCTION | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def run_layer4( | |
| l1: Layer1Result, | |
| l2: Layer2Result, | |
| l3: Layer3Result, | |
| ) -> Layer4Result: | |
| """ | |
| Synthesise all layer results into the final XAI verdict. | |
| Args: | |
| l1: Layer 1 semantic result | |
| l2: Layer 2 image result | |
| l3: Layer 3 authority result | |
| Returns: | |
| Layer4Result β final verdict with explanations | |
| """ | |
| logger.info("βοΈ Layer 4: Computing final verdict...") | |
| # ββ Feature vector (all normalised to [0, 1] where 1 = more suspicious) ββ | |
| features = _extract_features(l1, l2, l3) | |
| # ββ Weighted ensemble score βββββββββββββββββββββββββββββββββββββββββββββββ | |
| ensemble_score = _compute_ensemble_score(features) | |
| # ββ Risk level from ensemble score ββββββββββββββββββββββββββββββββββββββββ | |
| risk = _score_to_risk(ensemble_score, l1, l2, l3) | |
| # ββ Confidence (0β100%) βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| confidence = _compute_confidence(ensemble_score, l1, l2, l3) | |
| # ββ SHAP feature attributions βββββββββββββββββββββββββββββββββββββββββββββ | |
| shap_features = _compute_shap_features(features) | |
| # ββ Plain-English outputs βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| key_reasons = _build_key_reasons(l1, l2, l3, risk) | |
| explanation = _build_explanation(risk, l1, l2, l3) | |
| transparency = _build_transparency_note(l1, l2, l3) | |
| recommendation = _build_recommendation(risk) | |
| logger.info( | |
| f"β Layer 4 done β risk={risk}, confidence={confidence:.1f}%, " | |
| f"ensemble_score={ensemble_score:.3f}" | |
| ) | |
| return Layer4Result( | |
| overall_risk=risk, | |
| confidence_score=confidence, | |
| explanation=explanation, | |
| key_reasons=key_reasons, | |
| transparency_note=transparency, | |
| recommendation=recommendation, | |
| shap_features=shap_features, | |
| ) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # FEATURE EXTRACTION | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _extract_features( | |
| l1: Layer1Result, | |
| l2: Layer2Result, | |
| l3: Layer3Result, | |
| ) -> dict: | |
| """ | |
| Convert raw layer outputs into normalised [0, 1] feature values. | |
| 0 = completely safe, 1 = completely suspicious. | |
| """ | |
| # Stance alignment: +1 (entailment) β 0 fake, -1 (contradiction) β 1 fake | |
| stance_fake = float(np.clip((1.0 - l1.stance_alignment) / 2.0, 0.0, 1.0)) | |
| # Domain age: newer = more suspicious (cap at 24 months) | |
| domain_age_fake = float(np.clip(1.0 - (l3.domain_age_months / 24.0), 0.0, 1.0)) | |
| # Credibility: lower score = more suspicious | |
| credibility_fake = float(np.clip(1.0 - (l3.credibility_score / 100.0), 0.0, 1.0)) | |
| # Fact check: any FALSE verdict = strong signal | |
| fc_false_count = sum( | |
| 1 for fc in l3.fact_check_results | |
| if "FALSE" in fc.rating.upper() or "INCORRECT" in fc.rating.upper() | |
| ) | |
| fact_check_fake = float(np.clip(fc_false_count / max(len(l3.fact_check_results), 1), 0.0, 1.0)) | |
| return { | |
| "semantic_confidence": l1.confidence_score, # Already 0=safe, 1=fake | |
| "stance": stance_fake, | |
| "emotional_bias": l1.emotional_bias, | |
| "clickbait": l1.clickbait_score, | |
| "deepfake": l2.deepfake_score, | |
| "image_reuse": 1.0 if l2.reverse_search_match else 0.0, | |
| "domain_age": domain_age_fake, | |
| "credibility": credibility_fake, | |
| "fact_check": fact_check_fake, | |
| # Binary boosters | |
| "imposter": 1.0 if l3.is_imposter else 0.0, | |
| "no_ssl": 0.0 if l3.ssl_valid else 0.8, | |
| "metadata_stripped": 0.0 if l2.metadata_integrity else 0.5, | |
| } | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ENSEMBLE SCORING | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _compute_ensemble_score(features: dict) -> float: | |
| """ | |
| Weighted linear combination of all features. | |
| Returns a score in [0, 1] where 1 = almost certainly fake. | |
| """ | |
| score = ( | |
| features["semantic_confidence"] * WEIGHT_SEMANTIC_CONFIDENCE + | |
| features["stance"] * WEIGHT_STANCE + | |
| features["emotional_bias"] * WEIGHT_EMOTIONAL_BIAS + | |
| features["clickbait"] * WEIGHT_CLICKBAIT + | |
| features["deepfake"] * WEIGHT_DEEPFAKE + | |
| features["image_reuse"] * WEIGHT_IMAGE_REUSE + | |
| features["domain_age"] * WEIGHT_DOMAIN_AGE + | |
| features["credibility"] * WEIGHT_CREDIBILITY + | |
| features["fact_check"] * WEIGHT_FACT_CHECK | |
| ) | |
| # Hard boosters for definitive signals | |
| if features["imposter"] > 0: | |
| score = max(score, 0.85) # Imposter site = almost certainly fake | |
| if features["no_ssl"] > 0: | |
| score += 0.05 # No SSL = small additional penalty | |
| return float(np.clip(score, 0.0, 1.0)) | |
| def _score_to_risk( | |
| score: float, | |
| l1: Layer1Result, | |
| l2: Layer2Result, | |
| l3: Layer3Result, | |
| ) -> RiskLevel: | |
| """ | |
| Convert ensemble score to one of 4 risk levels. | |
| Also considers hard rules (e.g., imposter = FAKE NEWS). | |
| """ | |
| # Hard rules (override score) | |
| if l3.is_imposter: | |
| return RiskLevel.FAKE_NEWS | |
| false_fact_checks = [ | |
| fc for fc in l3.fact_check_results | |
| if "FALSE" in fc.rating.upper() | |
| ] | |
| if len(false_fact_checks) >= 2: | |
| return RiskLevel.FAKE_NEWS | |
| # Score-based thresholds | |
| if score >= settings.FAKE_CONFIDENCE_THRESHOLD: | |
| return RiskLevel.FAKE_NEWS | |
| if score >= 0.55: | |
| return RiskLevel.MISLEADING | |
| if score >= settings.SUSPICIOUS_CONFIDENCE_THRESHOLD: | |
| return RiskLevel.BE_CAREFUL | |
| return RiskLevel.TRUSTWORTHY | |
| def _compute_confidence( | |
| ensemble_score: float, | |
| l1: Layer1Result, | |
| l2: Layer2Result, | |
| l3: Layer3Result, | |
| ) -> float: | |
| """ | |
| Compute confidence in the verdict (0β100%). | |
| Higher confidence when: | |
| - Multiple layers agree | |
| - Hard signals are present (fact-checks, imposter, etc.) | |
| - Ensemble score is far from the thresholds | |
| """ | |
| # Base confidence from how far the score is from the nearest threshold | |
| thresholds = [0.0, settings.SUSPICIOUS_CONFIDENCE_THRESHOLD, 0.55, | |
| settings.FAKE_CONFIDENCE_THRESHOLD, 1.0] | |
| distances = [abs(ensemble_score - t) for t in thresholds] | |
| margin = min(distances[1:]) # Distance from nearest decision boundary | |
| base_confidence = 50.0 + margin * 100.0 | |
| # Agreement bonus: layers agreeing increases confidence | |
| statuses = [l1.status.value, l2.status.value, l3.status.value] | |
| agreement = len(set(statuses)) | |
| if agreement == 1: # All 3 layers agree | |
| base_confidence += 15.0 | |
| elif agreement == 2: # 2 of 3 agree | |
| base_confidence += 5.0 | |
| # Hard signal bonuses | |
| if l3.is_imposter: | |
| base_confidence += 10.0 | |
| if any("FALSE" in fc.rating for fc in l3.fact_check_results): | |
| base_confidence += 10.0 | |
| if l2.reverse_search_match and not l2.metadata_integrity: | |
| base_confidence += 5.0 | |
| return float(np.clip(base_confidence, 50.0, 99.5)) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SHAP FEATURE ATTRIBUTIONS | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _compute_shap_features(features: dict) -> List[SHAPFeature]: | |
| """ | |
| Compute SHAP-style feature attribution values. | |
| In production, we would use actual SHAP TreeExplainer / KernelExplainer | |
| on the ensemble model. Here we compute additive attributions directly | |
| from the weighted feature contributions. | |
| Positive SHAP value = pushes toward FAKE | |
| Negative SHAP value = pushes toward TRUSTWORTHY | |
| """ | |
| weights = { | |
| "semantic_confidence": WEIGHT_SEMANTIC_CONFIDENCE, | |
| "stance": WEIGHT_STANCE, | |
| "emotional_bias": WEIGHT_EMOTIONAL_BIAS, | |
| "clickbait": WEIGHT_CLICKBAIT, | |
| "deepfake": WEIGHT_DEEPFAKE, | |
| "image_reuse": WEIGHT_IMAGE_REUSE, | |
| "domain_age": WEIGHT_DOMAIN_AGE, | |
| "credibility": WEIGHT_CREDIBILITY, | |
| "fact_check": WEIGHT_FACT_CHECK, | |
| } | |
| descriptions = { | |
| "semantic_confidence": "AI reading of the article text", | |
| "stance": "Headline vs article body mismatch", | |
| "emotional_bias": "Fear/anger language used", | |
| "clickbait": "Clickbait headline patterns", | |
| "deepfake": "Image manipulation probability", | |
| "image_reuse": "Old image reused for new story", | |
| "domain_age": "How new/unknown the website is", | |
| "credibility": "Website trust rating", | |
| "fact_check": "Fact-checker verdicts", | |
| } | |
| shap_features: List[SHAPFeature] = [] | |
| for feat_name, weight in weights.items(): | |
| feat_value = features.get(feat_name, 0.0) | |
| # SHAP value: how much this feature contributes above/below baseline (0.5) | |
| shap_val = (feat_value - 0.5) * weight * 2.0 | |
| shap_features.append(SHAPFeature( | |
| feature=feat_name.replace("_", " ").title(), | |
| value=round(shap_val, 4), | |
| description=descriptions.get(feat_name, feat_name), | |
| )) | |
| # Sort by absolute contribution (most impactful first) | |
| shap_features.sort(key=lambda f: abs(f.value), reverse=True) | |
| return shap_features[:8] # Return top 8 features | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # PLAIN-ENGLISH OUTPUTS | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _build_key_reasons( | |
| l1: Layer1Result, | |
| l2: Layer2Result, | |
| l3: Layer3Result, | |
| risk: RiskLevel, | |
| ) -> List[str]: | |
| """ | |
| Build a list of plain-language reasons (5β6 bullets) explaining the verdict. | |
| Each reason is written for an average Indian news reader. | |
| """ | |
| reasons: List[str] = [] | |
| prefix = { | |
| RiskLevel.TRUSTWORTHY: "β ", | |
| RiskLevel.BE_CAREFUL: "β οΈ", | |
| RiskLevel.MISLEADING: "πΆ", | |
| RiskLevel.FAKE_NEWS: "π¨", | |
| }[risk] | |
| # ββ Layer 1 reasons βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if l1.stance_alignment > 0.6: | |
| reasons.append(f"β The headline accurately describes what the article actually says") | |
| elif l1.stance_alignment > 0.2: | |
| reasons.append(f"β οΈ The headline partially matches the article β some details may be exaggerated") | |
| else: | |
| reasons.append(f"π¨ The headline does NOT match the article body β this is a misleading headline") | |
| if l1.emotional_bias > settings.EMOTIONAL_BIAS_THRESHOLD: | |
| reasons.append(f"{prefix} Uses extreme fear-based or anger-provoking language to manipulate readers") | |
| elif l1.emotional_bias < 0.25: | |
| reasons.append("β Uses calm, factual language β not trying to scare or anger you") | |
| if l1.clickbait_score > settings.CLICKBAIT_THRESHOLD: | |
| reasons.append(f"{prefix} Headline uses clickbait tactics designed to get you to click without thinking") | |
| # ββ Layer 2 reasons βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if l2.image_found: | |
| if l2.reverse_search_match: | |
| reasons.append(f"π¨ The image is {l2.image_age or 'years'} old and being falsely used for this new story") | |
| elif l2.deepfake_score < 0.2: | |
| reasons.append("β Images appear to be original and unedited") | |
| if not l2.metadata_integrity: | |
| reasons.append(f"{prefix} Image metadata was deleted β a common trick to hide where the image really came from") | |
| # ββ Layer 3 reasons βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if l3.domain_age_months < 3: | |
| reasons.append(f"π¨ This website was created only {l3.domain_age} ago β brand new sites spreading news are suspicious") | |
| elif l3.domain_age_months > 36: | |
| reasons.append(f"β This website has been established for {l3.domain_age} β a sign of credibility") | |
| if l3.is_imposter: | |
| reasons.append(f"π¨ This website is pretending to be '{l3.mimicked_domain}' β a well-known news site. This is a common fake news trick") | |
| if not l3.ssl_valid: | |
| reasons.append(f"{prefix} This website has no security certificate β real news organisations always have HTTPS") | |
| false_checks = [fc for fc in l3.fact_check_results if "FALSE" in fc.rating.upper()] | |
| if false_checks: | |
| publishers = ", ".join(fc.publisher for fc in false_checks[:2]) | |
| reasons.append(f"π¨ Fact-checkers ({publishers}) have already confirmed this story is FALSE") | |
| elif not l3.fact_check_results: | |
| if l3.credibility_score > 80: | |
| reasons.append(f"β No fact-checker has disputed this story from {l3.credibility_score}/100 rated source") | |
| return reasons[:7] # Max 7 reasons | |
| def _build_explanation( | |
| risk: RiskLevel, | |
| l1: Layer1Result, | |
| l2: Layer2Result, | |
| l3: Layer3Result, | |
| ) -> str: | |
| """Build a 2β3 sentence plain-English explanation of the verdict.""" | |
| if risk == RiskLevel.TRUSTWORTHY: | |
| parts = [ | |
| f"This article is from a well-established source with a credibility score of {l3.credibility_score}/100." | |
| ] | |
| if l1.stance_alignment > 0.6: | |
| parts.append("The headline accurately matches what the article body actually says.") | |
| if not l2.reverse_search_match and l2.image_found: | |
| parts.append("The images appear to be original and have not been found in older, unrelated stories.") | |
| if not l3.fact_check_results: | |
| parts.append("No fact-checker has disputed any claim in this story.") | |
| return " ".join(parts) | |
| elif risk == RiskLevel.BE_CAREFUL: | |
| parts = [ | |
| "This article makes claims that could not be fully verified." | |
| ] | |
| if l3.domain_age_months < 12: | |
| parts.append(f"The website is only {l3.domain_age} old and is not a well-known source.") | |
| if l1.emotional_bias > 0.35: | |
| parts.append("The article uses language that is designed to provoke emotion rather than inform you.") | |
| if l3.fact_check_results: | |
| parts.append("Some claims in this article have been marked as unverified by fact-checkers.") | |
| parts.append("We recommend checking this story on a trusted news outlet before believing or sharing it.") | |
| return " ".join(parts) | |
| elif risk == RiskLevel.MISLEADING: | |
| parts = [] | |
| if l1.stance_alignment < 0.3: | |
| parts.append("The headline is misleading β it does not accurately represent what the article actually says.") | |
| if l1.emotional_bias > 0.6: | |
| parts.append("The article uses extreme emotional language designed to make you react without thinking.") | |
| if l3.domain_age_months < 12: | |
| parts.append(f"The source website is only {l3.domain_age} old and has a low credibility score of {l3.credibility_score}/100.") | |
| if not parts: | |
| parts.append("Multiple warning signs were detected across this article β it is likely misleading.") | |
| return " ".join(parts) | |
| else: # FAKE_NEWS | |
| parts = ["This article shows strong signs of being deliberate fake news."] | |
| if l3.is_imposter: | |
| parts.append(f"The website '{l3.mimicked_domain}' is a fake site impersonating a real news outlet.") | |
| if l2.reverse_search_match: | |
| parts.append(f"The image used is {l2.image_age or 'years'} old and was stolen from an unrelated story.") | |
| false_checks = [fc for fc in l3.fact_check_results if "FALSE" in fc.rating.upper()] | |
| if false_checks: | |
| parts.append(f"The main claims have been confirmed FALSE by {false_checks[0].publisher}.") | |
| if l1.emotional_bias > 0.8: | |
| parts.append("It uses extreme fear-mongering language β a hallmark of disinformation.") | |
| return " ".join(parts) | |
| def _build_transparency_note( | |
| l1: Layer1Result, | |
| l2: Layer2Result, | |
| l3: Layer3Result, | |
| ) -> str: | |
| """ | |
| A brief note explaining HOW SatyaCheck arrived at its conclusion. | |
| This is the 'glass box' XAI output β letting the user understand the process. | |
| """ | |
| checks_done = [] | |
| checks_done.append("read and understood the full article text") | |
| if l2.image_found: | |
| checks_done.append("analysed the article's images for tampering and recycled content") | |
| checks_done.append(f"checked the website '{l3.domain_age}' domain for age, security, and credibility") | |
| if l3.fact_check_results: | |
| checks_done.append(f"found {len(l3.fact_check_results)} relevant fact-check report(s)") | |
| checks_str = ", ".join(checks_done[:-1]) + f", and {checks_done[-1]}" | |
| return ( | |
| f"SatyaCheck's AI {checks_str}. " | |
| f"The verdict is based on {len([l1, l2, l3])} independent checks " | |
| f"working together β not just one signal." | |
| ) | |
| def _build_recommendation(risk: RiskLevel) -> str: | |
| """Final one-line actionable recommendation for the user.""" | |
| return { | |
| RiskLevel.TRUSTWORTHY: "Safe to read and share with others.", | |
| RiskLevel.BE_CAREFUL: "Do not share until you verify this with a trusted news source like NDTV, The Hindu, or BBC India.", | |
| RiskLevel.MISLEADING: "Do not share this. The headline is misleading and likely does not reflect the truth.", | |
| RiskLevel.FAKE_NEWS: "Do NOT share this. This is fake news designed to mislead and divide people.", | |
| }[risk] | |