nahArnav commited on
Commit
5eef4d0
Β·
verified Β·
1 Parent(s): 1b7f23c

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +206 -382
main.py CHANGED
@@ -8,6 +8,7 @@ import hashlib
8
  import logging
9
  import re
10
  import time
 
11
  from contextlib import asynccontextmanager
12
  from datetime import datetime, timedelta
13
  import random
@@ -21,9 +22,11 @@ from pydantic import BaseModel, Field
21
  from model import classify, load_model
22
  from nlp_utils import build_search_query, detect_language, detect_suspicious_phrases, extract_keywords
23
  from scraper import extract_article
24
- from verifier import verify_claim
25
  from decision_engine import make_decision
26
 
 
 
 
27
  # ── Logging ─────────────────────────────────────────────────────────────────
28
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)-7s | %(name)s | %(message)s")
29
  logger = logging.getLogger("verilens")
@@ -67,40 +70,34 @@ class SourceOut(BaseModel):
67
  snippet: str
68
  trust: str
69
 
70
- # ── NEW: Origin & Mutation Map schemas ───────────────────────────────────
71
  class OriginNode(BaseModel):
72
- """A node on the Origin & Mutation Map (newspaper clipping)."""
73
  id: str
74
- node_type: str # "hostile_actor" | "amplifier" | "current_claim"
75
- source_type: str # "FORUM POST", "SOCIAL MEDIA", "MAJOR NEWS OUTLET", etc.
76
- author: str # "ANON_USER44", "@HEALTHGURU_99", outlet name
77
- timestamp: str # ISO-ish date string
78
- snippet: str # The text on the clipping
79
- url: str # Link to examine source
80
 
81
  class MutationConnection(BaseModel):
82
- """A dotted line between two nodes with an NLI badge."""
83
- from_node: str # id of source node
84
- to_node: str # id of target node
85
- nli_label: str # "ENTAILMENT" | "CONTRADICTION"
86
- nli_score: int # percentage, e.g. 98
87
 
88
  class GroundTruthItem(BaseModel):
89
- """One item in the evidence analysis list."""
90
  index: int
91
  text: str
92
- badge: str # "UNVERIFIED" | "CONTRADICTION" | "FALLACY" | "CORROBORATED"
93
 
94
  class GroundTruthData(BaseModel):
95
- """The Established Fact + Evidence Analysis panel."""
96
- established_fact: str # The corrective summary
97
  evidence_items: list[GroundTruthItem]
98
 
99
  class OriginMapData(BaseModel):
100
  nodes: list[OriginNode]
101
  connections: list[MutationConnection]
102
 
103
- # ── NEW: Frontend-compatible schemas (matches React sampleAnalysis) ──────
104
  class FrontendAnnotation(BaseModel):
105
  type: Literal['contradiction', 'fallacy', 'unverified', 'verified']
106
  note: str
@@ -125,7 +122,7 @@ class FrontendEvidenceNode(BaseModel):
125
  class FrontendConnection(BaseModel):
126
  from_field: str = Field(alias="from", serialization_alias="from")
127
  to: str
128
- nli: dict # {"type": "contradiction" | "entailment", "score": int}
129
 
130
  model_config = {"populate_by_name": True}
131
 
@@ -140,19 +137,17 @@ class AnalyzeResponse(BaseModel):
140
  suspicious: dict
141
  factors: dict
142
  elapsed_ms: int
143
- # ── Figma dashboard fields ───────────────────────────────────────────
144
- verdict_label: str # "FABRICATED" | "VERIFIED" | "UNDER REVIEW"
145
- case_number: str # e.g. "TB-006753"
146
- origin_map: OriginMapData # structured node + connection data
147
- ground_truth: GroundTruthData # established fact + evidence items
148
- # ── Frontend-compatible fields (React components) ────────────────────
149
  claim: str
150
  verdict: Literal['VERIFIED', 'FABRICATED', 'INCONCLUSIVE']
151
  segments: list[FrontendSegment]
152
  sourceTree: list[FrontendEvidenceNode]
153
  connections: list[FrontendConnection]
154
- groundTruth: str # Dynamic established fact string for the UI
155
- confidenceExplanation: str # Detailed analytical breakdown of the confidence score
156
 
157
 
158
  # ── Helpers: build supplementary data from existing signals ──────────────
@@ -166,38 +161,25 @@ _NODE_TYPES_HOSTILE = ["FORUM POST", "ANONYMOUS TIP", "CHAN BOARD", "DARK WEB PO
166
  _NODE_TYPES_AMP = ["SOCIAL MEDIA", "BLOG", "REPOST", "VIRAL TWEET"]
167
 
168
  def _generate_case_number(text: str) -> str:
169
- """Deterministic case number from input hash."""
170
  h = hashlib.md5(text.encode()).hexdigest()
171
  num = int(h[:6], 16) % 999999
172
  return f"TB-{num:06d}"
173
 
174
  def _build_origin_map(sources: list, verification_score: float, text: str) -> OriginMapData:
175
- """
176
- Build the Origin & Mutation Map from existing source data.
177
- Maps sources into Hostile Actor / Amplifier / Current Claim nodes
178
- and creates NLI connections between them.
179
- """
180
  nodes: list[OriginNode] = []
181
  connections: list[MutationConnection] = []
182
-
183
  now = datetime.now()
184
- rng = random.Random(hash(text)) # deterministic per-claim randomness
185
 
186
  if not sources:
187
- # Even with no sources, show the current claim node
188
  nodes.append(OriginNode(
189
- id="claim_0",
190
- node_type="current_claim",
191
- source_type="SUBMITTED CLAIM",
192
- author="USER SUBMISSION",
193
- timestamp=now.strftime("%Y-%m-%d %H:%M"),
194
- snippet=text[:120] + ("…" if len(text) > 120 else ""),
195
- url="",
196
  ))
197
  return OriginMapData(nodes=nodes, connections=connections)
198
 
199
- # Categorize sources into node types based on trust level
200
- for i, src in enumerate(sources[:4]): # max 4 nodes on the map
201
  if src.trust == "low":
202
  ntype = "hostile_actor"
203
  stype = rng.choice(_NODE_TYPES_HOSTILE)
@@ -209,7 +191,6 @@ def _build_origin_map(sources: list, verification_score: float, text: str) -> Or
209
  else:
210
  ntype = "current_claim"
211
  stype = "MAJOR NEWS OUTLET"
212
- # Extract outlet name from title
213
  author = src.title.split(" - ")[-1] if " - " in src.title else src.title[:30]
214
 
215
  days_ago = rng.randint(1, 14)
@@ -218,67 +199,41 @@ def _build_origin_map(sources: list, verification_score: float, text: str) -> Or
218
  ts = (now - timedelta(days=days_ago)).replace(hour=hours, minute=minutes)
219
 
220
  nodes.append(OriginNode(
221
- id=f"node_{i}",
222
- node_type=ntype,
223
- source_type=stype,
224
- author=author,
225
- timestamp=ts.strftime("%Y-%m-%d %H:%M"),
226
- snippet=src.snippet[:150] if src.snippet else src.title,
227
- url=src.url,
228
  ))
229
 
230
- # Create connections between sequential nodes with NLI scores
231
  for i in range(len(nodes) - 1):
232
- # Derive NLI label from verification score + source trust
233
  score_base = int(verification_score * 100) if verification_score else 50
234
  jitter = rng.randint(-15, 15)
235
  nli_score = max(10, min(99, score_base + jitter))
236
 
237
- # High scores on high-trust = ENTAILMENT, low trust = CONTRADICTION
238
  src_trust = sources[i].trust if i < len(sources) else "medium"
239
  if src_trust == "low":
240
  nli_label = "CONTRADICTION"
241
- nli_score = max(70, nli_score) # hostile actors get high contradiction
242
  elif nli_score >= 60:
243
  nli_label = "ENTAILMENT"
244
  else:
245
  nli_label = "CONTRADICTION"
246
 
247
  connections.append(MutationConnection(
248
- from_node=nodes[i].id,
249
- to_node=nodes[i + 1].id,
250
- nli_label=nli_label,
251
- nli_score=nli_score,
252
  ))
253
 
254
  return OriginMapData(nodes=nodes, connections=connections)
255
 
256
 
257
- def _build_ground_truth(
258
- prediction: str,
259
- explanation: str,
260
- suspicious: dict,
261
- keywords: list[str],
262
- sources: list,
263
- ) -> GroundTruthData:
264
- """Build the Established Fact + Evidence Analysis from existing signals."""
265
-
266
- # The established fact is derived from the AI explanation
267
  if prediction == "Fake":
268
- established_fact = (
269
- f"Based on cross-referencing {len(sources)} sources and NLI entailment analysis, "
270
- f"this claim could not be substantiated. {explanation}"
271
- )
272
  elif prediction == "Real":
273
- established_fact = (
274
- f"This claim has been corroborated by {len(sources)} independent sources. {explanation}"
275
- )
276
  else:
277
- established_fact = (
278
- f"Verification produced mixed results across {len(sources)} sources. {explanation}"
279
- )
280
 
281
- # Build evidence items from suspicious phrases + source data
282
  items: list[GroundTruthItem] = []
283
  idx = 1
284
 
@@ -298,231 +253,111 @@ def _build_ground_truth(
298
  items.append(GroundTruthItem(index=idx, text=f'Unsupported attribution: "{phrase}"', badge="UNVERIFIED"))
299
  idx += 1
300
 
301
- # Add source-based evidence
302
  high_trust_sources = [s for s in sources if s.trust == "high"]
303
  low_trust_sources = [s for s in sources if s.trust == "low"]
304
 
305
  if high_trust_sources:
306
- items.append(GroundTruthItem(
307
- index=idx,
308
- text=f"Corroborated by {len(high_trust_sources)} high-trust source(s): {high_trust_sources[0].title[:60]}",
309
- badge="CORROBORATED",
310
- ))
311
  idx += 1
312
 
313
  if low_trust_sources:
314
- items.append(GroundTruthItem(
315
- index=idx,
316
- text=f"Found in {len(low_trust_sources)} low-trust source(s) β€” possible disinformation origin",
317
- badge="CONTRADICTION",
318
- ))
319
  idx += 1
320
 
321
  if not items:
322
- items.append(GroundTruthItem(
323
- index=1,
324
- text="No specific evidence markers detected in the text",
325
- badge="UNVERIFIED",
326
- ))
327
 
328
  return GroundTruthData(established_fact=established_fact, evidence_items=items)
329
 
330
 
331
- # ── Helpers: build frontend-compatible structures ────────────────────────
332
-
333
- # Layout presets for source nodes: (x, y, rotation) β€” diverse spread
334
- _SOURCE_LAYOUT_WIKI = (80.0, 20.0, -1) # Top-right for Wikipedia
335
- _SOURCE_LAYOUT_NEWS = [
336
- (20.0, 30.0, -2),
337
- (50.0, 80.0, 3),
338
- (15.0, 60.0, 1),
339
- (60.0, 45.0, -3),
340
- ]
341
-
342
-
343
- def _build_direct_source_tree(
344
- text: str,
345
- sources: list,
346
- verification_score: float,
347
- per_source_scores: list[float] | None = None,
348
- ) -> tuple[list[FrontendEvidenceNode], list[FrontendConnection]]:
349
- """
350
- Build the Evidence Board directly from verification sources.
351
- Ensures a diverse mix of Wikipedia (historical) + news sources.
352
- Always produces β‰₯1 node (the claim). With sources β†’ β‰₯3 nodes.
353
- Returns (sourceTree, connections).
354
- """
355
  now = datetime.now()
356
  rng = random.Random(hash(text))
357
  nodes: list[FrontendEvidenceNode] = []
358
  conns: list[FrontendConnection] = []
359
 
360
- # ── Node 1: The Claim (always present) ───────────────────────────────
361
  claim_node = FrontendEvidenceNode(
362
- id="claim_0",
363
- role="current",
364
- type="User Submission",
365
- date=now.strftime("%Y-%m-%d %H:%M"),
366
- author="SUBMITTED CLAIM",
367
- content=text[:150] + ("…" if len(text) > 150 else ""),
368
- x=50.0,
369
- y=75.0,
370
- rotation=2,
371
  )
372
  nodes.append(claim_node)
373
 
374
  if not sources:
375
  return nodes, conns
376
 
377
- # ── Separate Wikipedia (historical) from news sources ────────────────
378
  wiki_sources = [s for s in sources if "wikipedia.org" in s.url]
379
  news_sources = [s for s in sources if "wikipedia.org" not in s.url]
 
380
 
381
- # Build ordered list: Wikipedia first, then news, ensuring rich diversity
382
- ordered: list[tuple] = [] # (source, layout_x, layout_y, layout_rot, source_type_label)
383
-
384
- # Always include Wikipedia if available
385
  for ws in wiki_sources[:1]:
386
  x, y, rot = _SOURCE_LAYOUT_WIKI
387
  ordered.append((ws, x, y, rot, "Historical Archive"))
388
 
389
- # Always include at least 2 news articles
390
  news_idx = 0
391
  for ns in news_sources[:3]:
392
  x, y, rot = _SOURCE_LAYOUT_NEWS[news_idx % len(_SOURCE_LAYOUT_NEWS)]
393
  ordered.append((ns, x, y, rot, "News Article"))
394
  news_idx += 1
395
 
396
- # If we still have < 3 sources, fill with remaining Wikipedia
397
  if len(ordered) < 3:
398
  for ws in wiki_sources[1:3 - len(ordered) + 1]:
399
  x, y, rot = _SOURCE_LAYOUT_NEWS[news_idx % len(_SOURCE_LAYOUT_NEWS)]
400
  ordered.append((ws, x, y, rot, "Historical Archive"))
401
  news_idx += 1
402
 
403
- # ── Build nodes + connections for each source ────────────────────────
404
- # Build a score lookup for per-source NLI
405
  source_score_map: dict[str, float] = {}
406
  if per_source_scores and len(per_source_scores) == len(sources):
407
  for s, sc in zip(sources, per_source_scores):
408
  source_score_map[s.url] = sc
409
 
410
  for i, (src, x, y, rot, type_label) in enumerate(ordered[:4]):
411
- # Determine role based on trust level
412
- if src.trust == "low":
413
- role = "hostile"
414
- else:
415
- role = "amplifier"
416
-
417
- # Extract a readable author name
418
- if " - " in src.title:
419
- author = src.title.split(" - ")[-1].strip()[:30]
420
- elif "wikipedia.org" in src.url:
421
- author = "WIKIPEDIA"
422
- else:
423
- author = src.title[:30] if src.title else "Unknown Source"
424
 
425
  days_ago = rng.randint(1, 14)
426
  ts = (now - timedelta(days=days_ago)).strftime("%Y-%m-%d %H:%M")
427
  node_id = f"source_{i + 1}"
428
 
429
  nodes.append(FrontendEvidenceNode(
430
- id=node_id,
431
- role=role,
432
- type=type_label,
433
- date=ts,
434
- author=author,
435
- content=src.snippet[:150] if src.snippet else src.title,
436
- x=x,
437
- y=y,
438
- rotation=rot,
439
- url=src.url if src.url else None,
440
  ))
441
 
442
- # ── Connection: source β†’ claim with per-source NLI ───────────────
443
  src_score = source_score_map.get(src.url, verification_score)
444
  nli_type = "entailment" if src_score >= 0.65 else "contradiction"
445
  nli_score = max(10, min(99, int(src_score * 100)))
446
 
447
- conns.append(FrontendConnection(
448
- from_field=node_id,
449
- to="claim_0",
450
- nli={"type": nli_type, "score": nli_score},
451
- ))
452
 
453
  return nodes, conns
454
 
455
 
456
  def _extract_ground_truth_string(sources: list) -> str:
457
- """Extract the established fact string from the highest-trust source."""
458
- if not sources:
459
- return "No established fact could be determined from available sources."
460
-
461
- # Prefer Wikipedia first
462
  for s in sources:
463
- if "wikipedia.org" in s.url:
464
- return s.snippet[:300] if s.snippet else s.title
465
-
466
- # Then any high-trust source
467
  for s in sources:
468
- if s.trust == "high" and s.snippet:
469
- return s.snippet[:300]
470
-
471
- # Fallback to first source with a snippet
472
  for s in sources:
473
- if s.snippet:
474
- return s.snippet[:300]
475
-
476
  return "No established fact could be determined from available sources."
477
 
478
 
479
- def _build_segments(
480
- text: str,
481
- suspicious: dict,
482
- ground_truth: GroundTruthData,
483
- ml_label: str = "",
484
- ml_confidence: float = 0.0,
485
- ) -> list[FrontendSegment]:
486
- """
487
- Split the claim text into annotated segments.
488
- Prepends a Linguistic Analysis segment with the ML model's reasoning,
489
- then uses suspicious phrase detection + ground truth evidence.
490
- """
491
  segments: list[FrontendSegment] = []
492
-
493
- # ── Segment 0: ML Model Linguistic Analysis ──────────────────────────
494
  if ml_label:
495
  ml_label_display = ml_label.upper()
496
  ml_pct = int(ml_confidence * 100)
497
- if ml_label_display == "FAKE":
498
- ml_note = (
499
- f"The local NLP model analyzed the linguistic syntax and scored "
500
- f"this claim at {ml_pct}% FAKE due to sensationalist phrasing, "
501
- f"emotional manipulation, or patterns consistent with disinformation."
502
- )
503
- elif ml_label_display == "REAL":
504
- ml_note = (
505
- f"The local NLP model analyzed the linguistic syntax and scored "
506
- f"this claim at {ml_pct}% REAL β€” professional journalistic tone "
507
- f"detected with minimal sensationalist markers."
508
- )
509
- else:
510
- ml_note = (
511
- f"The local NLP model analyzed the linguistic syntax but could "
512
- f"not reach a definitive conclusion (confidence: {ml_pct}%). "
513
- f"The text contains a mix of professional and informal language patterns."
514
- )
515
- segments.append(FrontendSegment(
516
- text=f"[LINGUISTIC ANALYSIS] ",
517
- isSuspicious=True,
518
- annotation=FrontendAnnotation(type="unverified", note=ml_note),
519
- ))
520
-
521
- # ── Collect evidence items as potential annotations ───────────────────
522
- evidence_annotations: list[tuple[str, str]] = []
523
- for item in ground_truth.evidence_items:
524
- evidence_annotations.append((item.badge, item.text))
525
 
 
526
  sus_phrases: list[str] = []
527
  for key in ["clickbait_phrases", "emotional_language", "unsupported_claims"]:
528
  sus_phrases.extend(suspicious.get(key, []))
@@ -533,26 +368,15 @@ def _build_segments(
533
  segments.append(FrontendSegment(text=text, isSuspicious=False))
534
  return segments
535
 
536
- badge_to_annotation_type = {
537
- "FALLACY": "fallacy",
538
- "UNVERIFIED": "unverified",
539
- "CONTRADICTION": "contradiction",
540
- "CORROBORATED": "verified",
541
- }
542
-
543
  evidence_idx = 0
544
 
545
  for sentence in sentences:
546
  sentence_text = sentence.strip()
547
- if not sentence_text:
548
- continue
549
- if not sentence_text.endswith(" "):
550
- sentence_text += " "
551
-
552
  is_sus = any(phrase.lower() in sentence_text.lower() for phrase in sus_phrases)
553
-
554
- if not is_sus and evidence_idx < len(evidence_annotations) and len(sentences) <= 5:
555
- is_sus = True
556
 
557
  annotation = None
558
  if is_sus and evidence_idx < len(evidence_annotations):
@@ -561,76 +385,31 @@ def _build_segments(
561
  annotation = FrontendAnnotation(type=ann_type, note=note)
562
  evidence_idx += 1
563
 
564
- segments.append(FrontendSegment(
565
- text=sentence_text,
566
- isSuspicious=is_sus and annotation is not None,
567
- annotation=annotation,
568
- ))
569
 
570
  return segments
571
 
572
 
573
- def _build_confidence_explanation(
574
- ml_label: str,
575
- ml_confidence: float,
576
- similarity_score: float,
577
- num_sources: int,
578
- high_trust_count: int,
579
- low_trust_count: int,
580
- final_prediction: str,
581
- final_confidence: int,
582
- wiki_verified: bool,
583
- ) -> str:
584
- """Build a highly detailed, analytical explanation of how the confidence score was derived."""
585
  parts: list[str] = []
586
-
587
- # ── 1. ML Model analysis ─────────────────────────────────────────────
588
  ml_pct = int(ml_confidence * 100)
589
- parts.append(
590
- f"STEP 1 β€” LINGUISTIC ANALYSIS: The local DistilBERT NLP model "
591
- f"classified the text as {ml_label.upper()} with {ml_pct}% internal "
592
- f"confidence after analyzing syntax patterns, sensationalist markers, "
593
- f"and journalistic tone indicators."
594
- )
595
-
596
- # ── 2. Cross-Encoder verification ────────────────────────────────────
597
  sim_pct = int(similarity_score * 100)
598
  threshold_met = "PASSED" if similarity_score >= 0.65 else "FAILED"
599
- parts.append(
600
- f"STEP 2 β€” CROSS-ENCODER VERIFICATION: A live internet scan retrieved "
601
- f"{num_sources} source(s). The Cross-Encoder semantic similarity scored "
602
- f"{sim_pct}% against the 65% entailment threshold ({threshold_met}). "
603
- f"{'Wikipedia independently corroborated the claim.' if wiki_verified else 'No Wikipedia corroboration was found.'}"
604
- )
605
-
606
- # ── 3. Source trust breakdown ─────────────────────────────────────────
607
  medium_trust = num_sources - high_trust_count - low_trust_count
608
- parts.append(
609
- f"STEP 3 β€” SOURCE TRUST AUDIT: Of {num_sources} sources, "
610
- f"{high_trust_count} rated HIGH trust, {medium_trust} rated MEDIUM, "
611
- f"and {low_trust_count} rated LOW. "
612
- f"{'A strong evidence base supports this verdict.' if high_trust_count >= 2 else 'The evidence base is limited, which affects overall confidence.'}"
613
- )
614
-
615
- # ── 4. Guardrail activations ─────────────────────────────────────────
616
  guardrails: list[str] = []
617
- if num_sources == 0:
618
- guardrails.append("ZERO-EVIDENCE PENALTY (no sources found, verdict forced to FABRICATED)")
619
- if final_prediction == "Uncertain" and similarity_score < 0.78 and not wiki_verified:
620
- guardrails.append("MUDDY WATERS GUARDRAIL (weak corroboration, verdict shifted to INCONCLUSIVE)")
621
 
622
- if guardrails:
623
- parts.append(f"STEP 4 β€” GUARDRAILS TRIGGERED: {'; '.join(guardrails)}.")
624
- else:
625
- parts.append("STEP 4 β€” GUARDRAILS: No safety overrides were triggered. The verdict reflects the raw analysis.")
626
-
627
- # ── 5. Final synthesis ───────────────────────────────────────────────
628
- parts.append(
629
- f"FINAL SYNTHESIS: Combining the ML model's {ml_label.upper()} signal, "
630
- f"the {sim_pct}% semantic match, and {num_sources} source(s), the system "
631
- f"arrived at a final confidence of {final_confidence}%."
632
- )
633
 
 
634
  return " β–Έ ".join(parts)
635
 
636
 
@@ -646,14 +425,52 @@ async def analyze(req: AnalyzeRequest):
646
  raise HTTPException(status_code=400, detail="Input cannot be empty.")
647
 
648
  t0 = time.time()
 
 
 
 
649
 
650
  if _is_url(raw):
651
  input_type = "URL"
652
- try:
653
- article = extract_article(raw)
654
- text = f"{article.title}. {article.text}"
655
- except ValueError as exc:
656
- raise HTTPException(status_code=422, detail=str(exc))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
657
  else:
658
  input_type = "TEXT"
659
  text = raw
@@ -663,104 +480,111 @@ async def analyze(req: AnalyzeRequest):
663
  suspicious = detect_suspicious_phrases(text)
664
  search_query = build_search_query(text)
665
 
666
- ml_result = classify(text)
667
- verification = await verify_claim(text, search_query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
 
669
  high_trust = sum(1 for s in verification.sources if s.trust == "high")
670
  low_trust = sum(1 for s in verification.sources if s.trust == "low")
671
 
672
- # ── Decision ────────────────────────────────────────────────────────────
673
  decision = make_decision(
674
- ml_label=ml_result.label,
675
- ml_confidence=ml_result.confidence,
676
- similarity_score=verification.similarity_score,
677
- sources_verified=verification.verified,
678
- suspicious_info=suspicious,
679
- high_trust_count=high_trust,
680
- low_trust_count=low_trust,
681
  )
682
 
683
- final_prediction = str(decision.prediction).title() # .title() makes it "Real", "Fake", or "Uncertain"
684
  final_confidence = int(decision.confidence)
685
  final_explanation = str(decision.explanation)
686
- # πŸ•΅οΈ Check if Wikipedia is one of the verified sources
687
  wiki_verified = any("wikipedia.org" in s.url for s in verification.sources)
688
 
689
- # πŸ›‘οΈ THE BULLETPROOF ZERO-EVIDENCE PENALTY (The "Ojas" Rule) πŸ›‘οΈ
690
- # Catch both Real and Uncertain guesses if there is NO evidence
691
- if final_prediction in ["Real", "Uncertain"] and len(verification.sources) == 0:
692
- logger.warning("Zero-Evidence Penalty triggered! Overriding AI verdict.")
 
 
693
  final_prediction = "Fake"
694
- final_confidence = 10 # This forces the UI bar to "Unreliable" (RED)
695
- final_explanation = "The AI text analysis found no sensationalism, but a live internet scan found ZERO evidence to support this claim. In journalism, a total lack of corroboration for a statement indicates it is unverified or FAKE."
696
-
697
- # πŸ›‘οΈ NEW: THE "MUDDY WATERS" GUARDRAIL πŸ›‘οΈ
698
-
699
- # If the AI says REAL, but the internet context match is weak/moderate (< 0.78)
700
- elif final_prediction == "Real" and verification.similarity_score < 0.78 and not wiki_verified:
701
- logger.warning("Muddy Waters Guardrail triggered! Weak internet corroboration.")
702
- final_prediction = "Uncertain"
703
- final_confidence = 50 # Pushes UI perfectly to the center YELLOW
704
- final_explanation = "The AI detected a professional journalistic tone, and related topics were found online. However, the EXACT claim could not be highly corroborated by the Cross-Encoder. This may be a misleading mix of real entities and fake events."
705
-
706
- # ── Build supplementary data for Figma dashboard ────────────────────
707
- source_outs = [SourceOut(title=s.title, url=s.url, snippet=s.snippet, trust=s.trust)
708
- for s in verification.sources]
709
-
710
  verdict_label = _VERDICT_MAP.get(final_prediction, "UNDER REVIEW")
711
  case_number = _generate_case_number(text)
712
  origin_map = _build_origin_map(verification.sources, verification.similarity_score, text)
713
- ground_truth = _build_ground_truth(
714
- final_prediction, final_explanation, suspicious, keywords, verification.sources
715
- )
716
 
717
- # ── Build frontend-compatible structures ─────────────────────────────
718
  frontend_verdict = _FRONTEND_VERDICT_MAP.get(final_prediction, "INCONCLUSIVE")
719
- frontend_source_tree, frontend_connections = _build_direct_source_tree(
720
- text, verification.sources, verification.similarity_score,
721
- )
722
- frontend_segments = _build_segments(
723
- text, suspicious, ground_truth,
724
- ml_label=ml_result.label, ml_confidence=ml_result.confidence,
725
- )
726
  ground_truth_string = _extract_ground_truth_string(verification.sources)
727
 
728
- # ── Build the detailed confidence explanation ─────────────────────────
729
  confidence_explanation = _build_confidence_explanation(
730
- ml_label=ml_result.label,
731
- ml_confidence=ml_result.confidence,
732
- similarity_score=verification.similarity_score,
733
- num_sources=len(verification.sources),
734
- high_trust_count=high_trust,
735
- low_trust_count=low_trust,
736
- final_prediction=final_prediction,
737
- final_confidence=final_confidence,
738
- wiki_verified=wiki_verified,
739
  )
740
 
 
 
 
 
 
741
  elapsed = int((time.time() - t0) * 1000)
742
 
743
  return AnalyzeResponse(
744
- input_type=input_type,
745
- prediction=final_prediction,
746
- confidence=final_confidence,
747
- explanation=final_explanation,
748
- sources=source_outs,
749
- language=language,
750
- keywords=keywords,
751
- suspicious=suspicious,
752
- factors=decision.factors,
753
- elapsed_ms=elapsed,
754
- verdict_label=verdict_label,
755
- case_number=case_number,
756
- origin_map=origin_map,
757
- ground_truth=ground_truth,
758
- # ── Frontend fields ──────────────────────────────────────────────
759
- claim=text,
760
- verdict=frontend_verdict,
761
- segments=frontend_segments,
762
- sourceTree=frontend_source_tree,
763
- connections=frontend_connections,
764
- groundTruth=ground_truth_string,
765
- confidenceExplanation=confidence_explanation,
766
  )
 
8
  import logging
9
  import re
10
  import time
11
+ import urllib.parse
12
  from contextlib import asynccontextmanager
13
  from datetime import datetime, timedelta
14
  import random
 
22
  from model import classify, load_model
23
  from nlp_utils import build_search_query, detect_language, detect_suspicious_phrases, extract_keywords
24
  from scraper import extract_article
 
25
  from decision_engine import make_decision
26
 
27
+ # ── πŸš€ NEW: Import Trust Lists and Models directly from verifier ──
28
+ from verifier import verify_claim, HIGH_TRUST_DOMAINS, LOW_TRUST_DOMAINS, VerificationResult, SourceArticle
29
+
30
  # ── Logging ─────────────────────────────────────────────────────────────────
31
  logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)-7s | %(name)s | %(message)s")
32
  logger = logging.getLogger("verilens")
 
70
  snippet: str
71
  trust: str
72
 
 
73
  class OriginNode(BaseModel):
 
74
  id: str
75
+ node_type: str
76
+ source_type: str
77
+ author: str
78
+ timestamp: str
79
+ snippet: str
80
+ url: str
81
 
82
  class MutationConnection(BaseModel):
83
+ from_node: str
84
+ to_node: str
85
+ nli_label: str
86
+ nli_score: int
 
87
 
88
  class GroundTruthItem(BaseModel):
 
89
  index: int
90
  text: str
91
+ badge: str
92
 
93
  class GroundTruthData(BaseModel):
94
+ established_fact: str
 
95
  evidence_items: list[GroundTruthItem]
96
 
97
  class OriginMapData(BaseModel):
98
  nodes: list[OriginNode]
99
  connections: list[MutationConnection]
100
 
 
101
  class FrontendAnnotation(BaseModel):
102
  type: Literal['contradiction', 'fallacy', 'unverified', 'verified']
103
  note: str
 
122
  class FrontendConnection(BaseModel):
123
  from_field: str = Field(alias="from", serialization_alias="from")
124
  to: str
125
+ nli: dict
126
 
127
  model_config = {"populate_by_name": True}
128
 
 
137
  suspicious: dict
138
  factors: dict
139
  elapsed_ms: int
140
+ verdict_label: str
141
+ case_number: str
142
+ origin_map: OriginMapData
143
+ ground_truth: GroundTruthData
 
 
144
  claim: str
145
  verdict: Literal['VERIFIED', 'FABRICATED', 'INCONCLUSIVE']
146
  segments: list[FrontendSegment]
147
  sourceTree: list[FrontendEvidenceNode]
148
  connections: list[FrontendConnection]
149
+ groundTruth: str
150
+ confidenceExplanation: str
151
 
152
 
153
  # ── Helpers: build supplementary data from existing signals ──────────────
 
161
  _NODE_TYPES_AMP = ["SOCIAL MEDIA", "BLOG", "REPOST", "VIRAL TWEET"]
162
 
163
  def _generate_case_number(text: str) -> str:
 
164
  h = hashlib.md5(text.encode()).hexdigest()
165
  num = int(h[:6], 16) % 999999
166
  return f"TB-{num:06d}"
167
 
168
  def _build_origin_map(sources: list, verification_score: float, text: str) -> OriginMapData:
 
 
 
 
 
169
  nodes: list[OriginNode] = []
170
  connections: list[MutationConnection] = []
 
171
  now = datetime.now()
172
+ rng = random.Random(hash(text))
173
 
174
  if not sources:
 
175
  nodes.append(OriginNode(
176
+ id="claim_0", node_type="current_claim", source_type="SUBMITTED CLAIM",
177
+ author="USER SUBMISSION", timestamp=now.strftime("%Y-%m-%d %H:%M"),
178
+ snippet=text[:120] + ("…" if len(text) > 120 else ""), url="",
 
 
 
 
179
  ))
180
  return OriginMapData(nodes=nodes, connections=connections)
181
 
182
+ for i, src in enumerate(sources[:4]):
 
183
  if src.trust == "low":
184
  ntype = "hostile_actor"
185
  stype = rng.choice(_NODE_TYPES_HOSTILE)
 
191
  else:
192
  ntype = "current_claim"
193
  stype = "MAJOR NEWS OUTLET"
 
194
  author = src.title.split(" - ")[-1] if " - " in src.title else src.title[:30]
195
 
196
  days_ago = rng.randint(1, 14)
 
199
  ts = (now - timedelta(days=days_ago)).replace(hour=hours, minute=minutes)
200
 
201
  nodes.append(OriginNode(
202
+ id=f"node_{i}", node_type=ntype, source_type=stype,
203
+ author=author, timestamp=ts.strftime("%Y-%m-%d %H:%M"),
204
+ snippet=src.snippet[:150] if src.snippet else src.title, url=src.url,
 
 
 
 
205
  ))
206
 
 
207
  for i in range(len(nodes) - 1):
 
208
  score_base = int(verification_score * 100) if verification_score else 50
209
  jitter = rng.randint(-15, 15)
210
  nli_score = max(10, min(99, score_base + jitter))
211
 
 
212
  src_trust = sources[i].trust if i < len(sources) else "medium"
213
  if src_trust == "low":
214
  nli_label = "CONTRADICTION"
215
+ nli_score = max(70, nli_score)
216
  elif nli_score >= 60:
217
  nli_label = "ENTAILMENT"
218
  else:
219
  nli_label = "CONTRADICTION"
220
 
221
  connections.append(MutationConnection(
222
+ from_node=nodes[i].id, to_node=nodes[i + 1].id,
223
+ nli_label=nli_label, nli_score=nli_score,
 
 
224
  ))
225
 
226
  return OriginMapData(nodes=nodes, connections=connections)
227
 
228
 
229
+ def _build_ground_truth(prediction: str, explanation: str, suspicious: dict, keywords: list[str], sources: list) -> GroundTruthData:
 
 
 
 
 
 
 
 
 
230
  if prediction == "Fake":
231
+ established_fact = f"Based on cross-referencing {len(sources)} sources and NLI entailment analysis, this claim could not be substantiated. {explanation}"
 
 
 
232
  elif prediction == "Real":
233
+ established_fact = f"This claim has been corroborated by {len(sources)} independent sources. {explanation}"
 
 
234
  else:
235
+ established_fact = f"Verification produced mixed results across {len(sources)} sources. {explanation}"
 
 
236
 
 
237
  items: list[GroundTruthItem] = []
238
  idx = 1
239
 
 
253
  items.append(GroundTruthItem(index=idx, text=f'Unsupported attribution: "{phrase}"', badge="UNVERIFIED"))
254
  idx += 1
255
 
 
256
  high_trust_sources = [s for s in sources if s.trust == "high"]
257
  low_trust_sources = [s for s in sources if s.trust == "low"]
258
 
259
  if high_trust_sources:
260
+ items.append(GroundTruthItem(index=idx, text=f"Corroborated by {len(high_trust_sources)} high-trust source(s): {high_trust_sources[0].title[:60]}", badge="CORROBORATED"))
 
 
 
 
261
  idx += 1
262
 
263
  if low_trust_sources:
264
+ items.append(GroundTruthItem(index=idx, text=f"Found in {len(low_trust_sources)} low-trust source(s) β€” possible disinformation origin", badge="CONTRADICTION"))
 
 
 
 
265
  idx += 1
266
 
267
  if not items:
268
+ items.append(GroundTruthItem(index=1, text="No specific evidence markers detected in the text", badge="UNVERIFIED"))
 
 
 
 
269
 
270
  return GroundTruthData(established_fact=established_fact, evidence_items=items)
271
 
272
 
273
+ _SOURCE_LAYOUT_WIKI = (80.0, 20.0, -1)
274
+ _SOURCE_LAYOUT_NEWS = [(20.0, 30.0, -2), (50.0, 80.0, 3), (15.0, 60.0, 1), (60.0, 45.0, -3)]
275
+
276
+ def _build_direct_source_tree(text: str, sources: list, verification_score: float, per_source_scores: list[float] | None = None) -> tuple[list[FrontendEvidenceNode], list[FrontendConnection]]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  now = datetime.now()
278
  rng = random.Random(hash(text))
279
  nodes: list[FrontendEvidenceNode] = []
280
  conns: list[FrontendConnection] = []
281
 
 
282
  claim_node = FrontendEvidenceNode(
283
+ id="claim_0", role="current", type="User Submission", date=now.strftime("%Y-%m-%d %H:%M"),
284
+ author="SUBMITTED CLAIM", content=text[:150] + ("…" if len(text) > 150 else ""),
285
+ x=50.0, y=75.0, rotation=2,
 
 
 
 
 
 
286
  )
287
  nodes.append(claim_node)
288
 
289
  if not sources:
290
  return nodes, conns
291
 
 
292
  wiki_sources = [s for s in sources if "wikipedia.org" in s.url]
293
  news_sources = [s for s in sources if "wikipedia.org" not in s.url]
294
+ ordered: list[tuple] = []
295
 
 
 
 
 
296
  for ws in wiki_sources[:1]:
297
  x, y, rot = _SOURCE_LAYOUT_WIKI
298
  ordered.append((ws, x, y, rot, "Historical Archive"))
299
 
 
300
  news_idx = 0
301
  for ns in news_sources[:3]:
302
  x, y, rot = _SOURCE_LAYOUT_NEWS[news_idx % len(_SOURCE_LAYOUT_NEWS)]
303
  ordered.append((ns, x, y, rot, "News Article"))
304
  news_idx += 1
305
 
 
306
  if len(ordered) < 3:
307
  for ws in wiki_sources[1:3 - len(ordered) + 1]:
308
  x, y, rot = _SOURCE_LAYOUT_NEWS[news_idx % len(_SOURCE_LAYOUT_NEWS)]
309
  ordered.append((ws, x, y, rot, "Historical Archive"))
310
  news_idx += 1
311
 
 
 
312
  source_score_map: dict[str, float] = {}
313
  if per_source_scores and len(per_source_scores) == len(sources):
314
  for s, sc in zip(sources, per_source_scores):
315
  source_score_map[s.url] = sc
316
 
317
  for i, (src, x, y, rot, type_label) in enumerate(ordered[:4]):
318
+ role = "hostile" if src.trust == "low" else "amplifier"
319
+ author = src.title.split(" - ")[-1].strip()[:30] if " - " in src.title else ("WIKIPEDIA" if "wikipedia.org" in src.url else (src.title[:30] if src.title else "Unknown Source"))
 
 
 
 
 
 
 
 
 
 
 
320
 
321
  days_ago = rng.randint(1, 14)
322
  ts = (now - timedelta(days=days_ago)).strftime("%Y-%m-%d %H:%M")
323
  node_id = f"source_{i + 1}"
324
 
325
  nodes.append(FrontendEvidenceNode(
326
+ id=node_id, role=role, type=type_label, date=ts, author=author,
327
+ content=src.snippet[:150] if src.snippet else src.title, x=x, y=y, rotation=rot, url=src.url if src.url else None,
 
 
 
 
 
 
 
 
328
  ))
329
 
 
330
  src_score = source_score_map.get(src.url, verification_score)
331
  nli_type = "entailment" if src_score >= 0.65 else "contradiction"
332
  nli_score = max(10, min(99, int(src_score * 100)))
333
 
334
+ conns.append(FrontendConnection(from_field=node_id, to="claim_0", nli={"type": nli_type, "score": nli_score}))
 
 
 
 
335
 
336
  return nodes, conns
337
 
338
 
339
  def _extract_ground_truth_string(sources: list) -> str:
340
+ if not sources: return "No established fact could be determined from available sources."
 
 
 
 
341
  for s in sources:
342
+ if "wikipedia.org" in s.url: return s.snippet[:300] if s.snippet else s.title
 
 
 
343
  for s in sources:
344
+ if s.trust == "high" and s.snippet: return s.snippet[:300]
 
 
 
345
  for s in sources:
346
+ if s.snippet: return s.snippet[:300]
 
 
347
  return "No established fact could be determined from available sources."
348
 
349
 
350
+ def _build_segments(text: str, suspicious: dict, ground_truth: GroundTruthData, ml_label: str = "", ml_confidence: float = 0.0) -> list[FrontendSegment]:
 
 
 
 
 
 
 
 
 
 
 
351
  segments: list[FrontendSegment] = []
 
 
352
  if ml_label:
353
  ml_label_display = ml_label.upper()
354
  ml_pct = int(ml_confidence * 100)
355
+ if ml_label_display == "FAKE": ml_note = f"The local NLP model analyzed the linguistic syntax and scored this claim at {ml_pct}% FAKE due to sensationalist phrasing, emotional manipulation, or patterns consistent with disinformation."
356
+ elif ml_label_display == "REAL": ml_note = f"The local NLP model analyzed the linguistic syntax and scored this claim at {ml_pct}% REAL β€” professional journalistic tone detected with minimal sensationalist markers."
357
+ else: ml_note = f"The local NLP model analyzed the linguistic syntax but could not reach a definitive conclusion (confidence: {ml_pct}%). The text contains a mix of professional and informal language patterns."
358
+ segments.append(FrontendSegment(text=f"[LINGUISTIC ANALYSIS] ", isSuspicious=True, annotation=FrontendAnnotation(type="unverified", note=ml_note)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
 
360
+ evidence_annotations: list[tuple[str, str]] = [(item.badge, item.text) for item in ground_truth.evidence_items]
361
  sus_phrases: list[str] = []
362
  for key in ["clickbait_phrases", "emotional_language", "unsupported_claims"]:
363
  sus_phrases.extend(suspicious.get(key, []))
 
368
  segments.append(FrontendSegment(text=text, isSuspicious=False))
369
  return segments
370
 
371
+ badge_to_annotation_type = {"FALLACY": "fallacy", "UNVERIFIED": "unverified", "CONTRADICTION": "contradiction", "CORROBORATED": "verified"}
 
 
 
 
 
 
372
  evidence_idx = 0
373
 
374
  for sentence in sentences:
375
  sentence_text = sentence.strip()
376
+ if not sentence_text: continue
377
+ if not sentence_text.endswith(" "): sentence_text += " "
 
 
 
378
  is_sus = any(phrase.lower() in sentence_text.lower() for phrase in sus_phrases)
379
+ if not is_sus and evidence_idx < len(evidence_annotations) and len(sentences) <= 5: is_sus = True
 
 
380
 
381
  annotation = None
382
  if is_sus and evidence_idx < len(evidence_annotations):
 
385
  annotation = FrontendAnnotation(type=ann_type, note=note)
386
  evidence_idx += 1
387
 
388
+ segments.append(FrontendSegment(text=sentence_text, isSuspicious=is_sus and annotation is not None, annotation=annotation))
 
 
 
 
389
 
390
  return segments
391
 
392
 
393
+ def _build_confidence_explanation(ml_label: str, ml_confidence: float, similarity_score: float, num_sources: int, high_trust_count: int, low_trust_count: int, final_prediction: str, final_confidence: int, wiki_verified: bool) -> str:
 
 
 
 
 
 
 
 
 
 
 
394
  parts: list[str] = []
 
 
395
  ml_pct = int(ml_confidence * 100)
396
+ parts.append(f"STEP 1 β€” LINGUISTIC ANALYSIS: The local DistilBERT NLP model classified the text as {ml_label.upper()} with {ml_pct}% internal confidence after analyzing syntax patterns, sensationalist markers, and journalistic tone indicators.")
397
+
 
 
 
 
 
 
398
  sim_pct = int(similarity_score * 100)
399
  threshold_met = "PASSED" if similarity_score >= 0.65 else "FAILED"
400
+ parts.append(f"STEP 2 β€” CROSS-ENCODER VERIFICATION: A live internet scan retrieved {num_sources} source(s). The Cross-Encoder semantic similarity scored {sim_pct}% against the 65% entailment threshold ({threshold_met}). {'Wikipedia independently corroborated the claim.' if wiki_verified else 'No Wikipedia corroboration was found.'}")
401
+
 
 
 
 
 
 
402
  medium_trust = num_sources - high_trust_count - low_trust_count
403
+ parts.append(f"STEP 3 β€” SOURCE TRUST AUDIT: Of {num_sources} sources, {high_trust_count} rated HIGH trust, {medium_trust} rated MEDIUM, and {low_trust_count} rated LOW. {'A strong evidence base supports this verdict.' if high_trust_count >= 2 else 'The evidence base is limited, which affects overall confidence.'}")
404
+
 
 
 
 
 
 
405
  guardrails: list[str] = []
406
+ if num_sources == 0: guardrails.append("ZERO-EVIDENCE PENALTY (no sources found, verdict forced to FABRICATED)")
407
+ if final_prediction == "Uncertain" and similarity_score < 0.78 and not wiki_verified: guardrails.append("MUDDY WATERS GUARDRAIL (weak corroboration, verdict shifted to INCONCLUSIVE)")
 
 
408
 
409
+ if guardrails: parts.append(f"STEP 4 β€” GUARDRAILS TRIGGERED: {'; '.join(guardrails)}.")
410
+ else: parts.append("STEP 4 β€” GUARDRAILS: No safety overrides were triggered. The verdict reflects the raw analysis.")
 
 
 
 
 
 
 
 
 
411
 
412
+ parts.append(f"FINAL SYNTHESIS: Combining the ML model's {ml_label.upper()} signal, the {sim_pct}% semantic match, and {num_sources} source(s), the system arrived at a final confidence of {final_confidence}%.")
413
  return " β–Έ ".join(parts)
414
 
415
 
 
425
  raise HTTPException(status_code=400, detail="Input cannot be empty.")
426
 
427
  t0 = time.time()
428
+
429
+ # ── πŸš€ FAST-PATH ROUTER VARIABLES ──
430
+ fast_path_trust = None
431
+ domain = ""
432
 
433
  if _is_url(raw):
434
  input_type = "URL"
435
+ parsed_url = urllib.parse.urlparse(raw)
436
+ domain = parsed_url.netloc.lower()
437
+ if domain.startswith("www."):
438
+ domain = domain[4:]
439
+
440
+ # πŸš€ THE GATEKEEPER: Check domain reputation instantly
441
+ if domain in HIGH_TRUST_DOMAINS:
442
+ fast_path_trust = "high"
443
+ elif domain in LOW_TRUST_DOMAINS:
444
+ fast_path_trust = "low"
445
+
446
+ # Extract the slug (just for UI display purposes)
447
+ path_parts = [p for p in parsed_url.path.split('/') if p]
448
+ valid_slug = ""
449
+ for part in reversed(path_parts):
450
+ candidate = part.replace('-', ' ').replace('_', ' ')
451
+ candidate = re.sub(r'\.[a-z0-9]+$', '', candidate, flags=re.IGNORECASE)
452
+ candidate = re.sub(r'\s\d{4}\s\d{2}\s\d{2}$', '', candidate).strip()
453
+ candidate = re.sub(r'\s\d+$', '', candidate).strip()
454
+ if len(candidate.split()) >= 3:
455
+ valid_slug = candidate
456
+ break
457
+
458
+ text = valid_slug if valid_slug else raw
459
+
460
+ # If not a known trusted domain, proceed with normal AI scraping
461
+ if not fast_path_trust:
462
+ try:
463
+ article = extract_article(raw)
464
+ if not article or not article.text or len(article.text.strip()) < 10:
465
+ raise ValueError("Empty response or blocked by anti-bot.")
466
+ text = f"{article.title}. {article.text}"
467
+ except Exception as exc:
468
+ logger.warning(f"Scraping blocked or failed: {exc}. Relying on slug.")
469
+ if not valid_slug:
470
+ raise HTTPException(
471
+ status_code=400,
472
+ detail="This news site actively blocks AI scrapers, and the link does not contain a readable headline. Please copy and paste the actual text of the article into the box instead."
473
+ )
474
  else:
475
  input_type = "TEXT"
476
  text = raw
 
480
  suspicious = detect_suspicious_phrases(text)
481
  search_query = build_search_query(text)
482
 
483
+ # ── πŸš€ EXECUTE FAST PATH OR NORMAL AI PATH ──
484
+ if fast_path_trust == "high":
485
+ logger.info(f"⚑ FAST-PATH TRIGGERED: High Trust Domain ({domain})")
486
+
487
+ class DummyML:
488
+ label = "Real"
489
+ confidence = 0.99
490
+ ml_result = DummyML()
491
+
492
+ # Build a mock source to populate the Evidence Board
493
+ src = SourceArticle(title=f"Official Verified Publisher: {domain.upper()}", url=raw, snippet=f"Direct link to official verified publisher ({domain}). Content from this source is inherently trusted.", trust="high")
494
+ src.stance = "entailment"
495
+ src.score = 0.99
496
+
497
+ verification = VerificationResult(
498
+ similarity_score=0.99, sources=[src], verified=True,
499
+ max_entailment=0.99
500
+ )
501
+
502
+ elif fast_path_trust == "low":
503
+ logger.info(f"⚑ FAST-PATH TRIGGERED: Low Trust Domain ({domain})")
504
+
505
+ class DummyML:
506
+ label = "Fake"
507
+ confidence = 0.99
508
+ ml_result = DummyML()
509
+
510
+ src = SourceArticle(title=f"Flagged Domain: {domain.upper()}", url=raw, snippet=f"Domain is flagged in the Truth Bureau database as a known source of misinformation, propaganda, or satire.", trust="low")
511
+ src.stance = "contradiction"
512
+ src.score = 0.99
513
+
514
+ verification = VerificationResult(
515
+ similarity_score=0.05, sources=[src], verified=False,
516
+ max_entailment=0.05
517
+ )
518
+
519
+ else:
520
+ # NORMAL AI EXECUTION
521
+ ml_result = classify(text)
522
+ verification = await verify_claim(text, search_query)
523
 
524
  high_trust = sum(1 for s in verification.sources if s.trust == "high")
525
  low_trust = sum(1 for s in verification.sources if s.trust == "low")
526
 
 
527
  decision = make_decision(
528
+ ml_label=ml_result.label, ml_confidence=ml_result.confidence,
529
+ similarity_score=verification.similarity_score, sources_verified=verification.verified,
530
+ suspicious_info=suspicious, high_trust_count=high_trust, low_trust_count=low_trust,
 
 
 
 
531
  )
532
 
533
+ final_prediction = str(decision.prediction).title()
534
  final_confidence = int(decision.confidence)
535
  final_explanation = str(decision.explanation)
 
536
  wiki_verified = any("wikipedia.org" in s.url for s in verification.sources)
537
 
538
+ # ── πŸš€ OVERRIDE FINAL EXPLANATIONS FOR FAST-PATH ──
539
+ if fast_path_trust == "high":
540
+ final_prediction = "Real"
541
+ final_confidence = 99
542
+ final_explanation = f"FAST-PATH VERIFICATION: The submitted URL directly matches '{domain}', which is listed in the Truth Bureau database as a highly trusted official source. No secondary AI corroboration was necessary."
543
+ elif fast_path_trust == "low":
544
  final_prediction = "Fake"
545
+ final_confidence = 5
546
+ final_explanation = f"FAST-PATH REJECTION: The submitted URL matches '{domain}', a domain heavily flagged in our database for misinformation, hostile propaganda, or satire."
547
+ else:
548
+ # Normal Guardrails only apply if not fast-path
549
+ if final_prediction in ["Real", "Uncertain"] and len(verification.sources) == 0:
550
+ final_prediction = "Fake"
551
+ final_confidence = 10
552
+ final_explanation = "The AI text analysis found no sensationalism, but a live internet scan found ZERO evidence to support this claim. In journalism, a total lack of corroboration for a statement indicates it is unverified or FAKE."
553
+ elif final_prediction == "Real" and verification.similarity_score < 0.78 and not wiki_verified:
554
+ final_prediction = "Uncertain"
555
+ final_confidence = 50
556
+ final_explanation = "The AI detected a professional journalistic tone, and related topics were found online. However, the EXACT claim could not be highly corroborated by the Cross-Encoder. This may be a misleading mix of real entities and fake events."
557
+
558
+ source_outs = [SourceOut(title=s.title, url=s.url, snippet=s.snippet, trust=s.trust) for s in verification.sources]
 
 
559
  verdict_label = _VERDICT_MAP.get(final_prediction, "UNDER REVIEW")
560
  case_number = _generate_case_number(text)
561
  origin_map = _build_origin_map(verification.sources, verification.similarity_score, text)
562
+ ground_truth = _build_ground_truth(final_prediction, final_explanation, suspicious, keywords, verification.sources)
 
 
563
 
 
564
  frontend_verdict = _FRONTEND_VERDICT_MAP.get(final_prediction, "INCONCLUSIVE")
565
+ frontend_source_tree, frontend_connections = _build_direct_source_tree(text, verification.sources, verification.similarity_score)
566
+ frontend_segments = _build_segments(text, suspicious, ground_truth, ml_label=ml_result.label, ml_confidence=ml_result.confidence)
 
 
 
 
 
567
  ground_truth_string = _extract_ground_truth_string(verification.sources)
568
 
569
+ # ── πŸš€ OVERRIDE UI EXPLANATION BOX FOR FAST-PATH ──
570
  confidence_explanation = _build_confidence_explanation(
571
+ ml_label=ml_result.label, ml_confidence=ml_result.confidence,
572
+ similarity_score=verification.similarity_score, num_sources=len(verification.sources),
573
+ high_trust_count=high_trust, low_trust_count=low_trust,
574
+ final_prediction=final_prediction, final_confidence=final_confidence, wiki_verified=wiki_verified,
 
 
 
 
 
575
  )
576
 
577
+ if fast_path_trust == "high":
578
+ confidence_explanation = f"STEP 1 β€” DOMAIN REPUTATION: The URL was instantly recognized as an official High-Trust publisher ({domain}). β–Έ STEP 2 β€” FAST-PATH ROUTING: Secondary Google News scanning and NLI checking were bypassed to save compute. β–Έ FINAL SYNTHESIS: The source is inherently trusted. Final confidence is 99%."
579
+ elif fast_path_trust == "low":
580
+ confidence_explanation = f"STEP 1 β€” DOMAIN REPUTATION: The URL was instantly matched against our blacklist ({domain}). β–Έ STEP 2 β€” FAST-PATH ROUTING: The domain is known for fabricating information. β–Έ FINAL SYNTHESIS: Claim rejected due to source origin. Final confidence is 5%."
581
+
582
  elapsed = int((time.time() - t0) * 1000)
583
 
584
  return AnalyzeResponse(
585
+ input_type=input_type, prediction=final_prediction, confidence=final_confidence, explanation=final_explanation,
586
+ sources=source_outs, language=language, keywords=keywords, suspicious=suspicious, factors=decision.factors, elapsed_ms=elapsed,
587
+ verdict_label=verdict_label, case_number=case_number, origin_map=origin_map, ground_truth=ground_truth,
588
+ claim=text, verdict=frontend_verdict, segments=frontend_segments, sourceTree=frontend_source_tree,
589
+ connections=frontend_connections, groundTruth=ground_truth_string, confidenceExplanation=confidence_explanation,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
590
  )