""" Fact-Checker CLI — Interactive article verification tool ========================================================= Uses the unified FactChecker engine to analyze articles/claims through both internal ML validation and external source verification. Usage: python check_article.py """ import os import warnings # Suppress noisy progress bars and warnings os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["TRANSFORMERS_VERBOSITY"] = "error" os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1" warnings.filterwarnings("ignore") # Suppress verbose logging from HTTP / translation / ML libraries import logging logging.getLogger("httpcore").setLevel(logging.ERROR) logging.getLogger("httpx").setLevel(logging.ERROR) logging.getLogger("urllib3").setLevel(logging.ERROR) logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR) logging.getLogger("deep_translator").setLevel(logging.ERROR) logging.getLogger("sentence_transformers").setLevel(logging.ERROR) logging.getLogger("transformers").setLevel(logging.ERROR) logging.getLogger("huggingface_hub").setLevel(logging.ERROR) logging.getLogger("filelock").setLevel(logging.ERROR) logging.getLogger("").setLevel(logging.WARNING) # root logger # Filter background-thread prints from httpcore / deep_translator that fire # asynchronously after translation completes ('Backend with N concurrent workers.'). # This is a race condition that sys.stdout redirection cannot catch (the message # fires after our suppressor restores stdout), so we patch builtins.print directly. import builtins as _builtins _orig_print = _builtins.print def _filtered_print(*args, **kwargs): msg = " ".join(str(a) for a in args) # Suppress known background-thread noise patterns from HTTP libraries _SUPPRESS = ("concurrent workers", "Backend with", "httpcore", "CONNECT") if any(s in msg for s in _SUPPRESS): return _orig_print(*args, **kwargs) _builtins.print = _filtered_print from checker.fact_checker import FactChecker # ── Display Helpers ── def explain_bias(bias): """Generate a human-readable explanation of the bias analysis result.""" lines = [] leaning = bias.get("leaning", "Centrist") is_biased = bias.get("is_biased", False) subjectivity = bias.get("subjectivity", 0.0) subj_flag = bias.get("subjectivity_flag", False) vader_compound = bias.get("vader_compound", 0.0) vader_biased = bias.get("vader_biased", False) unsourced = bias.get("unsourced_claim", False) d = bias.get("details", {}) left_kw = d.get("left_keywords", []) right_kw = d.get("right_keywords", []) sensational_kw = d.get("sensational_keywords", []) opinion_kw = d.get("opinion_markers", []) unsourced_sig = d.get("unsourced_signals", []) figure_hits = d.get("figure_hits", {}) # Leaning explanation LEANING_DESCRIPTIONS = { "Conservative": ( "conservative / Duterte-faction framing " "(pro-Duterte language or anti-Marcos/anti-Leni signals detected)" ), "Admin/Right": ("pro-administration / Marcos-faction framing detected"), "Left-leaning": ( "left-aligned or Liberal-bloc framing " "(pro-Leni language, human rights / EJK discourse detected)" ), "Right-leaning": ("pro-administration or nationalist framing"), } if leaning in LEANING_DESCRIPTIONS: reason = LEANING_DESCRIPTIONS[leaning] if right_kw and leaning in ("Conservative", "Admin/Right", "Right-leaning"): reason += f" | keywords: {', '.join(right_kw[:4])}" elif left_kw and leaning == "Left-leaning": reason += f" | keywords: {', '.join(left_kw[:4])}" lines.append(f" ➤ Labeled {leaning.upper()} because: {reason}.") else: lines.append( " ➤ Labeled CENTRIST: no strong political keyword lean detected." " This does not mean the article is neutral — see subjectivity and tone below." ) # Figure-based signals if figure_hits: for faction, kws in figure_hits.items(): if kws: label = faction.replace("_", " ").title() lines.append(f" ➤ {label} signals: {', '.join(kws[:5])}") # Unsourced claim flag if unsourced: sig_preview = ", ".join(unsourced_sig[:4]) if unsourced_sig else "" lines.append( f" ⚠️ UNSOURCED CLAIM DETECTED: Article contains blind-item / chismis-style " f"language with few or no named attributions." + (f" Signals: [{sig_preview}]" if sig_preview else "") ) # Bias flag explanation bias_reasons = [] if leaning != "Centrist": bias_reasons.append(f"political leaning detected ({leaning})") if subj_flag: bias_reasons.append(f"high subjectivity ({subjectivity:.2f} > 0.50 threshold)") if vader_biased: direction = "negative" if vader_compound < 0 else "positive" bias_reasons.append( f"strong emotional tone (VADER {vader_compound:+.3f} — strongly {direction})" ) if sensational_kw: bias_reasons.append(f"sensational language ({', '.join(sensational_kw[:3])})") if opinion_kw: bias_reasons.append( f"first-person opinion markers ({', '.join(opinion_kw[:3])})" ) if unsourced: bias_reasons.append("unsourced / blind-item style claims") if is_biased and bias_reasons: lines.append(f" ➤ BIASED because: {'; '.join(bias_reasons)}.") elif not is_biased: lines.append( " ➤ NOT flagged as biased: subjectivity within normal range, " "emotional tone moderate, no strong political keywords detected." ) return lines def explain_sources(db_results, web_results, top_score): """Generate a brief reliability note about external sources found.""" lines = [] RELIABLE_SOURCES = { "inquirer", "philstar", "manila bulletin", "abs-cbn", "cnn philippines", "gma", "rappler", "sunstar", "businessmirror", "pna", "philippine news agency", "bbc", "reuters", "ap", "associated press", "new york times", "the guardian", # Cebuano / Visayas regional outlets "superbalita", "banat", "cebu daily news", "sunstar cebu", "the freeman", "mb cebu", "sugbo", "visayas", "mindanao daily", "sunstar davao", "cdn digital", } def is_reliable(source): src = source.lower() return any(rel in src for rel in RELIABLE_SOURCES) reliable = list( {r["source"] for r in db_results + web_results if is_reliable(r["source"])} ) other = list( {r["source"] for r in db_results + web_results if not is_reliable(r["source"])} ) if reliable: lines.append( f" ➤ {len(reliable)} match(es) from known reliable source(s): " f"{', '.join(reliable[:5])}." ) if top_score >= 0.55: lines.append( " Strong similarity match from a reliable source — confidence in this story is higher." ) else: lines.append( " Partial match found — the story exists, but verify details independently." ) if other: lines.append( f" ➤ {len(other)} match(es) from other/unclassified source(s): " f"{', '.join(other[:5])}. Treat with caution." ) if not reliable and not other: lines.append(" ➤ No external sources found. Cannot assess source reliability.") return lines def display_pattern_deviation(pd): """Display the Pattern Deviation Check section.""" print("=" * 60) print(" PATTERN DEVIATION CHECK") print(" Does this article follow the reputable news pattern?") print("=" * 60) verdict = pd.get("verdict", "") deviation_score = pd.get("deviation_score", 0.0) extra_claims = pd.get("extra_claims", []) corroborated = pd.get("corroborated_claims", []) all_titles = pd.get("all_source_titles", []) reliable_count = pd.get("reliable_source_count", 0) if verdict == "NO EXTERNAL SOURCES": print(" No external sources available for pattern comparison.") print() return # External pattern preview print(f" Based on {len(all_titles)} external source(s)") if reliable_count > 0: print(f" ({reliable_count} from known reliable sources):") else: print(" (no known reliable sources):") for title, source in all_titles[:4]: src_label = f" [{source}]" if source else "" print(f' • "{title[:90]}"{src_label}') if len(all_titles) > 4: print(f" • …and {len(all_titles) - 4} more") print() # Corroborated claims if corroborated: print( f" ✔ Corroborated ({len(corroborated)} sentence(s) align with external sources):" ) for s in corroborated[:2]: print(f" → \"{s[:100]}{'...' if len(s) > 100 else ''}\"") print() # Extra claims (deviations) if extra_claims: print( f" ⚠️ Extra claims not found in reputable sources ({len(extra_claims)} sentence(s)):" ) for s in extra_claims[:4]: print(f" → \"{s[:100]}{'...' if len(s) > 100 else ''}\"") if len(extra_claims) > 4: print(f" …and {len(extra_claims) - 4} more") print() else: print(" ✔ No extra claims detected — article stays within the reported facts.") print() # Deviation score bar bar_filled = int(deviation_score * 20) bar = "█" * bar_filled + "░" * (20 - bar_filled) print(f" Deviation score: {deviation_score:.0%} [{bar}]") # Verdict with icon VERDICT_ICONS = { "FOLLOWS PATTERN": "✅", "MINOR DEVIATION": "🟡", "SIGNIFICANT DEVIATION": "🟠", } icon = VERDICT_ICONS.get(verdict, "⚠️") print(f" Verdict: {icon} {verdict}") if verdict == "FOLLOWS PATTERN": print( " ➤ The article's claims are consistent with what reputable sources report." ) elif verdict == "MINOR DEVIATION": print( " ➤ The article mostly follows reputable reporting but includes " "some claims that could not be corroborated externally. Verify those claims." ) elif verdict == "SIGNIFICANT DEVIATION": print( " ➤ BIAS SIGNAL: The article adds substantial claims not found in " "any reputable external source. These additions are the likely source of bias." ) print() def get_user_input(): """Read multi-line input from user (2 blank lines or 'done' to stop).""" print("=" * 60) print(" FACT-CHECKER (External + Internal ML Model)") print("=" * 60) print() print("Paste your article or claim below.") print("When done, press Enter on an empty line twice, or type 'done':") print() lines = [] empty_count = 0 while True: try: line = input() except EOFError: break if line.strip().lower() == "done": break if line == "": empty_count += 1 if empty_count >= 2: break continue else: if empty_count == 1: lines.append("") empty_count = 0 lines.append(line) return "\n".join(lines).strip() def display_results(result): """Display the full fact-check results.""" internal = result["internal"] external = result["external"] final_verdict = result["final_verdict"] final_details = result["final_details"] date_flags = result.get("date_flags", []) article_date = result.get("article_date") time_orient = result.get("time_orientation", {}) validation = internal["validation"] bias = internal["bias"] db_results = external["db_results"] web_results = external["web_results"] top_score = external["top_score"] # 1. News Validation print("=" * 60) print(" INTERNAL CHECK — NEWS VALIDATION") print("=" * 60) v_bar = "#" * int(validation["confidence"] * 20) + "-" * ( 20 - int(validation["confidence"] * 20) ) print(f" Verdict: {validation['verdict']}") print(f" Confidence: {validation['confidence']:.1%} [{v_bar}] (raw ML score)") print( f" Real: {validation['probabilities'].get('Real', 0):.1%} | " f"Fake: {validation['probabilities'].get('Fake', 0):.1%}" ) # Show obfuscation penalty note when applicable if bias.get("obfuscated_text"): adjusted = max(0.0, validation["confidence"] - 0.15) print( f" ⚠️ Confidence adjusted to {adjusted:.1%} in final verdict " "(−15% obfuscation penalty — real journalism is proofread)" ) print() # Time Orientation if time_orient: print(" Time Orientation:") past_bar = "█" * int(time_orient.get("past_focus", 0) * 100) present_bar = "█" * int(time_orient.get("present_focus", 0) * 100) future_bar = "█" * int(time_orient.get("future_focus", 0) * 100) print(f" Past: {time_orient.get('past_focus', 0):.2%} {past_bar}") print(f" Present: {time_orient.get('present_focus', 0):.2%} {present_bar}") print(f" Future: {time_orient.get('future_focus', 0):.2%} {future_bar}") print() # 2. Bias Analysis print("=" * 60) print(" INTERNAL CHECK — BIAS ANALYSIS") print("=" * 60) # Clarify leaning when Centrist but flagged by non-political signals leaning_display = bias["leaning"] if bias["leaning"] == "Centrist" and bias.get("obfuscated_text"): leaning_display = "Centrist (Obfuscated Social Media Style)" elif bias["leaning"] == "Centrist" and bias.get("unsourced_claim"): leaning_display = "Centrist (Unsourced Claim)" print(f" Leaning: {leaning_display}") # Biased line — explain when reason is NOT political leaning if bias["is_biased"] and bias["leaning"] == "Centrist": if bias.get("obfuscated_text"): print( " Biased: Yes (obfuscated text — real news is always proofread)" ) elif bias.get("unsourced_claim"): print(" Biased: Yes (unsourced claims detected)") else: print(f" Biased: Yes (high subjectivity or emotional tone)") else: print(f" Biased: {'Yes' if bias['is_biased'] else 'No'}") # Unsourced claim — show prominently if bias.get("unsourced_claim"): print(f" ⚠️ Unsourced: YES — blind item / chismis-style language detected") else: print(f" Unsourced: No named-source issues detected") # Obfuscation — intentional misspelling / character substitution if bias.get("obfuscated_text"): sigs = bias.get("details", {}).get("obfuscation_signals", []) sig_str = f" [{', '.join(sigs[:3])}]" if sigs else "" print( f" ⚠️ Obfuscated: YES — intentional misspelling / char substitution detected{sig_str}" ) else: print(f" Obfuscated: No character substitution detected") subj_status = ( "EXCEEDS THRESHOLD" if bias.get("subjectivity_flag") else "Within threshold" ) print(f" Subjectivity: {bias.get('subjectivity', 0):.2f} ({subj_status})") vader_compound = bias.get("vader_compound", 0.0) vader_label = ( "STRONGLY NEGATIVE" if vader_compound <= -0.5 else "STRONGLY POSITIVE" if vader_compound >= 0.5 else "Neutral/Moderate" ) print(f" Emotional tone (VADER): {vader_compound:+.3f} — {vader_label}") # ── New accuracy signals ── # Sarcasm detection (Rec 1) if bias.get("sarcasm_detected"): sarcasm_hits = bias.get("details", {}).get("sarcasm_hits", []) preview = f" ({sarcasm_hits[0]})" if sarcasm_hits else "" print(f" ⚠️ Sarcasm/Irony: DETECTED — positive adjective near negative fact{preview}") print(f" ↳ VADER positive-score penalty suppressed (article may be satirical)") # Filipino lexicon (Rec 2) fil_score = bias.get("filipino_lexicon_score", 0.0) if abs(fil_score) >= 0.10: direction = "negative (attack/derogatory terms)" if fil_score < 0 else "positive" print(f" Filipino lexicon: {fil_score:+.2f} — {direction}") # Vlog/platform detection (Rec 4) vlog_count = bias.get("vlog_pattern_count", 0) platform_boost = bias.get("platform_boost", 0) if platform_boost > 0: print(f" ⚠️ Vlog/Platform: {vlog_count} fingerprint(s) detected — sensationalism +{platform_boost}") if bias.get("details"): d = bias["details"] if d.get("left_keywords"): print(f" Left keywords: {', '.join(d['left_keywords'])}") if d.get("right_keywords"): print(f" Right keywords: {', '.join(d['right_keywords'])}") if d.get("sensational_keywords"): print(f" Sensationalism: {', '.join(d['sensational_keywords'])}") if d.get("opinion_markers"): print(f" Opinion markers: {', '.join(d['opinion_markers'])}") if d.get("unsourced_signals"): print(f" Chismis signals: {', '.join(d['unsourced_signals'][:5])}") # SVO hits (Rec 3) — show only when found svo_hits = d.get("svo_hits", []) if svo_hits: svo_preview = "; ".join(f'"{n}" → "{v}"' for n, v in svo_hits[:3]) print(f" SVO bias signal: {svo_preview}") # Bias explanation print() print(" BIAS EXPLANATION:") for line in explain_bias(bias): print(line) print() # 3. Structure Analysis — Inverted Pyramid structure = internal.get("structure", {}) if structure and structure.get("formalism_score", 0) is not None: print("=" * 60) lang_label = structure.get("language", "unknown").capitalize() print(f" STRUCTURE ANALYSIS — Inverted Pyramid ({lang_label})") print("=" * 60) # 5W+1H lead breakdown lead = structure.get("lead_5w1h", {}) dims = [ ("who", "Who"), ("what", "What"), ("where", "Where"), ("when", "When"), ("why", "Why"), ("how", "How"), ] checks = " ".join( f"{'✅' if lead.get(d) else '❌'} {label}" for d, label in dims ) completeness = structure.get("lead_completeness", 0) print(f" Lead (5W+1H): {checks}") print( f" Lead score: {completeness}/6 elements found in opening paragraph" ) # Attribution attr_count = structure.get("attribution_count", 0) if structure.get("has_attribution"): print(f" Attribution: ✅ {attr_count} named-source signal(s) found") else: print(f" Attribution: ❌ No named-source attribution detected") # Transitions trans_count = structure.get("transition_count", 0) if structure.get("has_transitions"): print(f" Transitions: ✅ {trans_count} formal transition(s) found") else: print(f" Transitions: ❌ No formal transitions detected") # Formalism score bar score = structure.get("formalism_score", 0) bar_len = score // 5 bar = "█" * bar_len + "░" * (20 - bar_len) if score >= 70: grade = "📰 Journalistic" elif score >= 45: grade = "📋 Partial" elif score >= 20: grade = "📱 Informal" else: grade = "⚠️ Unstructured" print(f" Formalism score: {score}/100 [{bar}] {grade}") print(f" ➤ {structure.get('assessment', '')}") print() # 4. Database Results if db_results: if top_score >= 0.55: db_verdict = "VERIFIED — Strong match found in news database" elif top_score >= 0.40: db_verdict = "RELATED COVERAGE FOUND — Partial match in database" else: db_verdict = "WEAK MATCH — Loosely related articles found" print("=" * 60) print(f" DATABASE: {db_verdict}") print( f" Top score: {top_score} " f"(semantic: {db_results[0]['semantic']}, keyword: {db_results[0]['keyword']})" ) print("=" * 60) print(f"\nFound {len(db_results)} related article(s) in database:\n") for i, r in enumerate(db_results, 1): score_bar = "█" * int(r["similarity"] * 20) + "░" * ( 20 - int(r["similarity"] * 20) ) print(f" {i}. [{r['similarity']:.0%}] {score_bar}") print(f" {r['title']}") print( f" Source: {r['source']} | Semantic: {r['semantic']:.0%} | " f"Keyword: {r['keyword']:.0%}" ) print(f" URL: {r['url']}") print() else: print(" No matches found in local database.\n") # 4. Web Results if web_results: print("=" * 60) if final_verdict == "VERIFIED": print(" WEB SEARCH: Additional coverage from Google News") else: print(" WEB SEARCH: Coverage found online (not in local DB)") print("=" * 60) print(f"\nFound {len(web_results)} article(s) from Google News:\n") for i, r in enumerate(web_results, 1): print(f" {i}. {r['title']}") print(f" Source: {r['source']} | Published: {r['published']}") print(f" URL: {r['url']}") print() else: print(" No results found on Google News either.\n") # Source reliability note if db_results or web_results: print("=" * 60) print(" SOURCE RELIABILITY ASSESSMENT") print("=" * 60) for line in explain_sources(db_results, web_results, top_score): print(line) print() # Pattern Deviation Check pattern_deviation = result.get("pattern_deviation") if pattern_deviation and pattern_deviation.get("verdict") != "NO EXTERNAL SOURCES": display_pattern_deviation(pattern_deviation) # 5. Date Flags (if any) if date_flags: print("=" * 60) print(" DATE VALIDATION") print("=" * 60) if article_date: print(f" Article Date: {article_date}") for flag in date_flags: print(f" {flag}") print() # 6. LIME Explanation lime_explanation = result.get("lime_explanation", []) if lime_explanation: verdict = validation["verdict"] confidence = validation["confidence"] print("=" * 60) print( f" LIME EXPLANATION — Why this was flagged as {verdict} ({confidence:.1%})" ) print("=" * 60) fake_words = [(w, weight) for w, weight in lime_explanation if weight > 0] real_words = [(w, weight) for w, weight in lime_explanation if weight < 0] # Scale bars relative to max absolute weight max_weight = max(abs(w) for _, w in lime_explanation) if lime_explanation else 1 if fake_words: print(" Words pushing toward FAKE:") for word, weight in sorted(fake_words, key=lambda x: -x[1])[:5]: bar_len = int((weight / max_weight) * 12) bar = "█" * bar_len + "░" * (12 - bar_len) print(f" {word:<20} [{bar}] +{weight:.3f}") print() if real_words: print(" Words pushing toward REAL:") for word, weight in sorted(real_words, key=lambda x: x[1])[:5]: bar_len = int((abs(weight) / max_weight) * 12) bar = "█" * bar_len + "░" * (12 - bar_len) print(f" {word:<20} [{bar}] {weight:.3f}") print() # 7. Claim Verification claim_verification = result.get("claim_verification", []) if claim_verification: print("=" * 60) print(" CLAIM VERIFICATION — Per-Claim Check") print("=" * 60) numeric = [cv for cv in claim_verification if cv.get("type") == "numeric"] phrase = [cv for cv in claim_verification if cv.get("type") == "phrase"] VERDICT_ICONS = {"CONFIRMED": "✅", "UNVERIFIED": "⚠️ ", "CONTRADICTED": "❌"} if numeric: print(f" Numeric claims ({len(numeric)}):\n") for cv in numeric: icon = VERDICT_ICONS.get(cv["verdict"], "?") claimed_str = f"{cv['claimed']:,}" found_note = "" if cv["verdict"] == "CONFIRMED" and cv.get("found"): found_note = f" (source says: {cv['found']:,})" elif cv["verdict"] == "CONTRADICTED" and cv.get("found"): found_note = f" (source says: {cv['found']:,} instead)" print(f" {icon} {cv['entity']}: claimed {claimed_str}{found_note}") if cv.get("source"): print(f" ↳ {cv['source'][:80]}") print() if phrase: print(f" Phrase claims ({len(phrase)}):\n") for cv in phrase: icon = VERDICT_ICONS.get(cv["verdict"], "?") print(f" {icon} \"{cv['claim_text']}\"") if cv.get("source"): print(f" ↳ {cv['source'][:80]}") print() # 8. Counter-Facts + Unsupported Claims counter_facts = result.get("counter_facts", []) pattern_deviation = result.get("pattern_deviation", {}) extra_claims = (pattern_deviation or {}).get("extra_claims", []) if counter_facts or extra_claims: print("=" * 60) print(" WHAT EXTERNAL SOURCES REPORT") print("=" * 60) # What reputable sources actually report if counter_facts: print(" What sources say:") for cf in counter_facts: print(f" → {cf}") print() # Article claims with NO supporting evidence in any source if extra_claims: print(" ⚠️ Claims in this article NOT found in any external source:") for claim in extra_claims: # Truncate long sentences cleanly at 120 chars snippet = claim.strip() if len(snippet) > 120: snippet = snippet[:117] + "..." print(f" ✗ \"{snippet}\"") print() # 9. Final Verdict print("=" * 60) print(f" FINAL VERDICT: {final_verdict}") if article_date: print(f" Article Date: {article_date}") # Print details (may contain date flags appended by _combine_verdicts) for line in final_details.split("\n"): print(f" {line}") # Misinformation type misinfo_type = result.get("misinformation_type", "") if misinfo_type: print() print(f" MISINFORMATION TYPE:") print(f" {misinfo_type}") print("=" * 60) def main(): fc = FactChecker() text = get_user_input() if not text: print("No input provided. Exiting.") return print(f"\n[Running Logic] Analyzing claim...\n") result = fc.check(text) display_results(result) if __name__ == "__main__": main()