Ryan Christian D. Deniega Claude Sonnet 4.6 commited on
Commit
8af997f
Β·
1 Parent(s): 2f3f71f

Add ML model comparison panel showing BoW, TF-IDF, Naive Bayes, and LDA results

Browse files

All four classical classifiers now run on every verification request (concurrently
with Layer 2 evidence fetch) and their verdicts, confidence scores, and top
triggered features appear in a side-by-side panel in the result UI between
the Score Breakdown and Layer 1/Layer 2 cards. LDA topic labels (lda_topic_N)
are highlighted in teal to distinguish them from TF-IDF feature chips.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

api/schemas.py CHANGED
@@ -94,6 +94,15 @@ class Layer2Result(BaseModel):
94
  claim_method: Optional[str] = Field(None, description="How the claim was extracted: sentence_scoring | sentence_heuristic | passthrough")
95
 
96
 
 
 
 
 
 
 
 
 
 
97
  # ── Main Response ─────────────────────────────────────────────────────────────
98
 
99
  class VerificationResponse(BaseModel):
@@ -111,6 +120,10 @@ class VerificationResponse(BaseModel):
111
  processing_time_ms: Optional[float] = None
112
  extracted_text: Optional[str] = Field(None, description="Raw text extracted from the URL / image / video for transparency")
113
  ocr_text: Optional[str] = Field(None, description="Text extracted from an image via OCR (when image_url was provided alongside text)")
 
 
 
 
114
 
115
 
116
  # ── History / Trends ──────────────────────────────────────────────────────────
 
94
  claim_method: Optional[str] = Field(None, description="How the claim was extracted: sentence_scoring | sentence_heuristic | passthrough")
95
 
96
 
97
+ # ── Classifier Comparison ─────────────────────────────────────────────────────
98
+
99
+ class ClassifierComparisonEntry(BaseModel):
100
+ name: str # "BoW", "TF-IDF", "Naive Bayes", "LDA"
101
+ verdict: Verdict
102
+ confidence: float = Field(..., ge=0.0, le=100.0)
103
+ top_features: list[str] = [] # up to 3 top features / lda_topic_N label
104
+
105
+
106
  # ── Main Response ─────────────────────────────────────────────────────────────
107
 
108
  class VerificationResponse(BaseModel):
 
120
  processing_time_ms: Optional[float] = None
121
  extracted_text: Optional[str] = Field(None, description="Raw text extracted from the URL / image / video for transparency")
122
  ocr_text: Optional[str] = Field(None, description="Text extracted from an image via OCR (when image_url was provided alongside text)")
123
+ classifier_comparison: list[ClassifierComparisonEntry] = Field(
124
+ default_factory=list,
125
+ description="Per-classifier results from all classical ML models (BoW, TF-IDF, NB, LDA)",
126
+ )
127
 
128
 
129
  # ── History / Trends ──────────────────────────────────────────────────────────
frontend/src/pages/VerifyPage.jsx CHANGED
@@ -885,6 +885,69 @@ export default function VerifyPage() {
885
  </p>
886
  </div>
887
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
888
  {/* Row 3: Layer cards (2 col, collapses to 1 on mobile) */}
889
  <div className="grid grid-cols-1 sm:grid-cols-2 gap-4 fade-up-4">
890
  {/* Layer 1 */}
 
885
  </p>
886
  </div>
887
 
888
+ {/* Model Comparison Panel */}
889
+ {result.classifier_comparison?.length > 0 && (
890
+ <div className="card p-5 fade-up-3">
891
+ <SectionHeading>Model Comparison β€” Classical ML</SectionHeading>
892
+ <p className="text-xs mb-4" style={{ color: 'var(--text-muted)', fontFamily: 'var(--font-body)', lineHeight: 1.6 }}>
893
+ All four classical classifiers trained on the same dataset β€” same input, different algorithms.
894
+ </p>
895
+ <div style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(160px, 1fr))', gap: 10 }}>
896
+ {result.classifier_comparison.map((clf) => (
897
+ <div key={clf.name} style={{
898
+ background: 'rgba(255,255,255,0.03)',
899
+ border: '1px solid rgba(255,255,255,0.08)',
900
+ borderRadius: 6, padding: '10px 12px',
901
+ }}>
902
+ <div style={{
903
+ fontSize: '0.65rem', fontFamily: 'var(--font-mono)',
904
+ letterSpacing: '0.08em', color: 'var(--text-muted)',
905
+ textTransform: 'uppercase', marginBottom: 6,
906
+ }}>
907
+ {clf.name}
908
+ </div>
909
+ <div style={{
910
+ display: 'inline-block', fontSize: '0.7rem', fontWeight: 600,
911
+ padding: '2px 8px', borderRadius: 3, marginBottom: 6,
912
+ background: clf.verdict === 'Credible' ? 'rgba(34,197,94,0.15)' :
913
+ clf.verdict === 'Likely Fake' ? 'rgba(239,68,68,0.15)' :
914
+ 'rgba(234,179,8,0.15)',
915
+ color: clf.verdict === 'Credible' ? 'var(--accent-green)' :
916
+ clf.verdict === 'Likely Fake' ? '#f87171' : 'var(--accent-gold)',
917
+ }}>
918
+ {clf.verdict}
919
+ </div>
920
+ <div style={{ fontSize: '0.68rem', color: 'var(--text-muted)', marginBottom: 4 }}>
921
+ {clf.confidence.toFixed(1)}% confidence
922
+ </div>
923
+ <div style={{ height: 3, background: 'rgba(255,255,255,0.08)', borderRadius: 2, marginBottom: 8 }}>
924
+ <div style={{
925
+ height: '100%', borderRadius: 2, width: `${clf.confidence}%`,
926
+ background: clf.verdict === 'Credible' ? 'var(--accent-green)' :
927
+ clf.verdict === 'Likely Fake' ? '#f87171' : 'var(--accent-gold)',
928
+ }} />
929
+ </div>
930
+ {clf.top_features?.length > 0 && (
931
+ <div style={{ display: 'flex', flexWrap: 'wrap', gap: 3 }}>
932
+ {clf.top_features.map((f, i) => (
933
+ <span key={i} style={{
934
+ fontSize: '0.6rem', padding: '1px 5px', borderRadius: 2,
935
+ background: f.startsWith('lda_topic') ? 'rgba(6,182,212,0.12)' : 'rgba(220,38,38,0.1)',
936
+ color: f.startsWith('lda_topic') ? 'var(--accent-cyan)' : '#f87171',
937
+ border: `1px solid ${f.startsWith('lda_topic') ? 'rgba(6,182,212,0.3)' : 'rgba(220,38,38,0.25)'}`,
938
+ fontFamily: 'var(--font-mono)',
939
+ }}>
940
+ {f}
941
+ </span>
942
+ ))}
943
+ </div>
944
+ )}
945
+ </div>
946
+ ))}
947
+ </div>
948
+ </div>
949
+ )}
950
+
951
  {/* Row 3: Layer cards (2 col, collapses to 1 on mobile) */}
952
  <div className="grid grid-cols-1 sm:grid-cols-2 gap-4 fade-up-4">
953
  {/* Layer 1 */}
scoring/engine.py CHANGED
@@ -14,6 +14,7 @@ from config import get_settings
14
  from api.schemas import (
15
  VerificationResponse, Verdict, Language, DomainTier,
16
  Layer1Result, Layer2Result, EntitiesResult, EvidenceSource, Stance,
 
17
  )
18
 
19
  logger = logging.getLogger(__name__)
@@ -31,6 +32,39 @@ def _get_nlp(key: str, factory):
31
  _nlp_cache[key] = factory()
32
  return _nlp_cache[key]
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # ── Domain credibility lookup ─────────────────────────────────────────────────
35
  _DOMAIN_DB_PATH = Path(__file__).parent.parent / "domain_credibility.json"
36
  _DOMAIN_DB: dict = {}
@@ -173,6 +207,9 @@ async def run_verification(
173
  evidence_sources: list[EvidenceSource] = []
174
  l2_verdict = Verdict.UNVERIFIED
175
 
 
 
 
176
  if settings.news_api_key:
177
  try:
178
  query_entities = ner_result.persons + ner_result.organizations + ner_result.locations
@@ -278,6 +315,8 @@ async def run_verification(
278
  verdict = _map_verdict(final_score)
279
 
280
  # ── Step 10: Assemble response ────────────────────────────────────────────
 
 
281
  result = VerificationResponse(
282
  verdict=verdict,
283
  confidence=round(max(l1.confidence, evidence_score / 100 * 100), 1),
@@ -295,6 +334,7 @@ async def run_verification(
295
  language=language,
296
  domain_credibility=get_domain_tier(source_domain) if source_domain else None,
297
  input_type=input_type,
 
298
  )
299
 
300
  # ── Record to Firestore (falls back to in-memory if Firebase not configured) ─
 
14
  from api.schemas import (
15
  VerificationResponse, Verdict, Language, DomainTier,
16
  Layer1Result, Layer2Result, EntitiesResult, EvidenceSource, Stance,
17
+ ClassifierComparisonEntry,
18
  )
19
 
20
  logger = logging.getLogger(__name__)
 
32
  _nlp_cache[key] = factory()
33
  return _nlp_cache[key]
34
 
35
+ # ── Classical classifier comparison ──────────────────────────────────────────
36
+ # Runs all four classical ML classifiers on every request for the demo panel.
37
+ # Each classifier trains once on first call and is cached via _get_nlp().
38
+
39
+ async def _run_comparison(text: str) -> list[ClassifierComparisonEntry]:
40
+ """Run BoW, TF-IDF, Naive Bayes, and LDA classifiers and return comparison entries."""
41
+ _COMPARISON_CLASSIFIERS = [
42
+ ("BoW", "cmp_bow", lambda: __import__("ml.bow_classifier", fromlist=["BoWClassifier"]).BoWClassifier()),
43
+ ("TF-IDF", "cmp_tfidf", lambda: __import__("ml.tfidf_classifier", fromlist=["TFIDFClassifier"]).TFIDFClassifier()),
44
+ ("Naive Bayes", "cmp_nb", lambda: __import__("ml.naive_bayes_classifier", fromlist=["NaiveBayesClassifier"]).NaiveBayesClassifier()),
45
+ ("LDA", "cmp_lda", lambda: __import__("ml.lda_analysis", fromlist=["LDAFeatureClassifier"]).LDAFeatureClassifier()),
46
+ ]
47
+
48
+ def _predict_all():
49
+ results = []
50
+ for name, key, factory in _COMPARISON_CLASSIFIERS:
51
+ try:
52
+ clf = _get_nlp(key, factory)
53
+ r = clf.predict(text)
54
+ results.append(ClassifierComparisonEntry(
55
+ name=name,
56
+ verdict=Verdict(r.verdict),
57
+ confidence=r.confidence,
58
+ top_features=r.triggered_features[:3],
59
+ ))
60
+ except Exception as exc:
61
+ logger.warning("Comparison classifier %s failed: %s", name, exc)
62
+ return results
63
+
64
+ loop = asyncio.get_event_loop()
65
+ return await loop.run_in_executor(None, _predict_all)
66
+
67
+
68
  # ── Domain credibility lookup ─────────────────────────────────────────────────
69
  _DOMAIN_DB_PATH = Path(__file__).parent.parent / "domain_credibility.json"
70
  _DOMAIN_DB: dict = {}
 
207
  evidence_sources: list[EvidenceSource] = []
208
  l2_verdict = Verdict.UNVERIFIED
209
 
210
+ # Run classifier comparison concurrently with evidence fetch
211
+ comparison_task = asyncio.create_task(_run_comparison(proc.cleaned))
212
+
213
  if settings.news_api_key:
214
  try:
215
  query_entities = ner_result.persons + ner_result.organizations + ner_result.locations
 
315
  verdict = _map_verdict(final_score)
316
 
317
  # ── Step 10: Assemble response ────────────────────────────────────────────
318
+ comparison = await comparison_task
319
+
320
  result = VerificationResponse(
321
  verdict=verdict,
322
  confidence=round(max(l1.confidence, evidence_score / 100 * 100), 1),
 
334
  language=language,
335
  domain_credibility=get_domain_tier(source_domain) if source_domain else None,
336
  input_type=input_type,
337
+ classifier_comparison=comparison,
338
  )
339
 
340
  # ── Record to Firestore (falls back to in-memory if Firebase not configured) ─