Spaces:

SemiAutomat1c
/

philverify-api

Running

philverify-api / frontend /src /pages /BenchmarksPage.jsx

Ryan Christian D. Deniega

feat: extension button placement, text extraction, OCR display + ML improvements

c78c2c1 2 months ago

15.1 kB

	import { useState } from 'react'
	import {
	BarChart, Bar, XAxis, YAxis, CartesianGrid, Tooltip,
	ResponsiveContainer, Cell, ReferenceLine,
	} from 'recharts'
	import { PAGE_STYLE } from '../App.jsx'

	// ── Eval results (from python -m ml.eval, seed=42, 79 train / 21 val) ─────────
	const MODELS = [
	{
	name: 'BoW + LogReg',
	shortName: 'BoW+LR',
	accuracy: 52.4,
	tier: 'classical',
	lecture: 'Lecture 3',
	note: 'CountVectorizer loses TF weighting — raw counts hurt precision on short headlines',
	},
	{
	name: 'BoW + LogReg + Lemma',
	shortName: 'BoW+LR+L',
	accuracy: 52.4,
	tier: 'classical',
	lecture: 'Lectures 2–3',
	note: 'No change from non-lemmatized — WordNet is English-biased; Tagalog tokens unchanged',
	},
	{
	name: 'TF-IDF + LogReg',
	shortName: 'TFIDF+LR',
	accuracy: 61.9,
	tier: 'classical',
	lecture: 'Lecture 3',
	note: 'Sublinear TF weighting reduces dominance of high-frequency terms; best classical model',
	},
	{
	name: 'TF-IDF + NB',
	shortName: 'TFIDF+NB',
	accuracy: 42.9,
	tier: 'classical',
	lecture: 'Lectures 5–6',
	note: 'Feature independence assumption breaks on 79 samples; noisy probability estimates',
	},
	{
	name: 'TF-IDF + NB + Lemma',
	shortName: 'NB+Lemma',
	accuracy: 42.9,
	tier: 'classical',
	lecture: 'Lectures 2, 5–6',
	note: 'Lemmatization again neutral — confirms English-biased lemmatizer finding',
	},
	{
	name: 'LDA + LogReg',
	shortName: 'LDA+LR',
	accuracy: 42.9,
	tier: 'classical',
	lecture: 'Lecture 7',
	note: '5 topics over 79 documents is too few for stable topic distributions',
	},
	{
	name: 'XLM-RoBERTa',
	shortName: 'XLM-R',
	accuracy: 90.5,
	tier: 'transformer',
	lecture: 'Transfer Learning',
	note: 'Pretrained on 100+ languages including Filipino; fine-tuned on combined dataset',
	},
	{
	name: 'Tagalog-RoBERTa',
	shortName: 'TL-R',
	accuracy: 95.2,
	tier: 'transformer',
	lecture: 'Transfer Learning',
	note: 'Pretrained on TLUnified Filipino corpus; higher recall on Tagalog/Taglish posts',
	},
	{
	name: 'Ensemble',
	shortName: 'Ensemble',
	accuracy: 100.0,
	tier: 'ensemble',
	lecture: 'Ensemble Methods',
	note: 'Soft-vote average of XLM-R + Tagalog-RoBERTa logits; 100% on 21-sample holdout',
	},
	]

	const TIER_COLOR = {
	classical: '#d97706', // gold
	transformer: '#06b6d4', // cyan
	ensemble: '#16a34a', // green
	}

	const TIER_LABEL = {
	classical: 'Classical ML',
	transformer: 'Transformer',
	ensemble: 'Ensemble',
	}

	const FINDINGS = [
	{
	lecture: 'Lecture 3',
	title: 'TF-IDF > Bag of Words',
	body: 'TF-IDF sublinear weighting outperforms raw BoW counts by +9.5%. Down-weighting high-frequency filler terms matters for short Filipino news headlines.',
	color: '#d97706',
	},
	{
	lecture: 'Lectures 5–6',
	title: 'Naive Bayes struggles at small scale',
	body: 'MultinomialNB reaches only 42.9% — 19pp below LogReg. Feature independence breaks down when training on 79 noisy, cross-lingual samples.',
	color: '#d97706',
	},
	{
	lecture: 'Lecture 7',
	title: 'LDA needs more documents',
	body: '5 topics over 79 training texts yields unstable distributions. Topic features are weak signal for 3-class classification; LDA would improve with 1000+ samples.',
	color: '#d97706',
	},
	{
	lecture: 'Lectures 2a–2c',
	title: 'Lemmatization: neutral on Tagalog',
	body: 'Zero accuracy change with WordNet lemmatization. English-biased lemmatizers return Tagalog tokens unchanged — confirms the tool is a no-op on Filipino text.',
	color: '#06b6d4',
	},
	]

	// ── Custom tooltip ─────────────────────────────────────────────────────────────
	function ChartTooltip({ active, payload }) {
	if (!active \|\| !payload?.length) return null
	const d = payload[0].payload
	return (
	<div style={{
	background: 'var(--bg-elevated)',
	border: '1px solid var(--border-light)',
	borderRadius: 4,
	padding: '10px 14px',
	fontFamily: 'var(--font-mono)',
	fontSize: 11,
	color: 'var(--text-primary)',
	maxWidth: 240,
	}}>
	<div style={{ fontWeight: 700, marginBottom: 4 }}>{d.name}</div>
	<div style={{ color: TIER_COLOR[d.tier], marginBottom: 6 }}>
	{d.accuracy.toFixed(1)}% accuracy
	</div>
	<div style={{ color: 'var(--text-muted)', fontSize: 10, lineHeight: 1.5 }}>{d.note}</div>
	</div>
	)
	}

	// ── Tier legend pill ───────────────────────────────────────────────────────────
	function TierPill({ tier }) {
	return (
	<span style={{
	display: 'inline-block',
	padding: '2px 8px',
	borderRadius: 2,
	fontSize: 9,
	fontFamily: 'var(--font-mono)',
	fontWeight: 700,
	letterSpacing: '0.06em',
	textTransform: 'uppercase',
	background: `${TIER_COLOR[tier]}18`,
	color: TIER_COLOR[tier],
	border: `1px solid ${TIER_COLOR[tier]}40`,
	}}>
	{TIER_LABEL[tier]}
	</span>
	)
	}

	export default function BenchmarksPage() {
	const [activeRow, setActiveRow] = useState(null)

	return (
	<main style={{ ...PAGE_STYLE, paddingTop: 48, paddingBottom: 80 }}>

	{/* ── Header ─────────────────────────────────────────────────────────── */}
	<div className="fade-up-1" style={{ marginBottom: 40 }}>
	<div style={{
	fontFamily: 'var(--font-mono)',
	fontSize: 10,
	letterSpacing: '0.14em',
	color: 'var(--accent-red)',
	textTransform: 'uppercase',
	marginBottom: 10,
	}}>
	ML Course — Model Comparison
	</div>
	<h1 style={{
	fontFamily: 'var(--font-display)',
	fontWeight: 800,
	fontSize: 32,
	letterSpacing: '-0.02em',
	color: 'var(--text-primary)',
	marginBottom: 12,
	}}>
	Model Benchmarks
	</h1>
	<p style={{
	fontFamily: 'var(--font-body)',
	fontSize: 14,
	color: 'var(--text-secondary)',
	lineHeight: 1.7,
	maxWidth: 560,
	}}>
	Comparison of 9 classifier variants on a 21-sample holdout from the
	handcrafted PhilVerify dataset (79 train / 21 val, seed 42). Classical
	models trained in-session; transformer checkpoints fine-tuned on the
	full combined dataset.
	</p>
	</div>

	{/* ── Key findings ───────────────────────────────────────────────────── */}
	<div className="fade-up-2" style={{ marginBottom: 48 }}>
	<h2 style={{
	fontFamily: 'var(--font-display)',
	fontWeight: 700,
	fontSize: 11,
	letterSpacing: '0.12em',
	textTransform: 'uppercase',
	color: 'var(--text-muted)',
	marginBottom: 16,
	}}>
	Key Findings
	</h2>
	<div style={{ display: 'grid', gridTemplateColumns: 'repeat(auto-fit, minmax(210px, 1fr))', gap: 12 }}>
	{FINDINGS.map((f) => (
	<div key={f.title} className="card" style={{ padding: '16px 18px' }}>
	<div style={{
	fontFamily: 'var(--font-mono)',
	fontSize: 9,
	letterSpacing: '0.1em',
	textTransform: 'uppercase',
	color: f.color,
	marginBottom: 6,
	}}>
	{f.lecture}
	</div>
	<div style={{
	fontFamily: 'var(--font-display)',
	fontWeight: 700,
	fontSize: 13,
	color: 'var(--text-primary)',
	marginBottom: 8,
	lineHeight: 1.3,
	}}>
	{f.title}
	</div>
	<p style={{
	fontFamily: 'var(--font-body)',
	fontSize: 11,
	color: 'var(--text-secondary)',
	lineHeight: 1.6,
	margin: 0,
	}}>
	{f.body}
	</p>
	</div>
	))}
	</div>
	</div>

	{/* ── Bar chart ──────────────────────────────────────────────────────── */}
	<div className="fade-up-3 card" style={{ padding: '24px 20px', marginBottom: 32 }}>
	<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', marginBottom: 20 }}>
	<h2 style={{
	fontFamily: 'var(--font-display)',
	fontWeight: 700,
	fontSize: 13,
	letterSpacing: '0.06em',
	color: 'var(--text-primary)',
	margin: 0,
	}}>
	Accuracy by Model
	</h2>
	<div style={{ display: 'flex', gap: 12 }}>
	{Object.entries(TIER_LABEL).map(([tier, label]) => (
	<div key={tier} style={{ display: 'flex', alignItems: 'center', gap: 5 }}>
	<span style={{ width: 8, height: 8, borderRadius: 2, background: TIER_COLOR[tier], display: 'inline-block' }} />
	<span style={{ fontFamily: 'var(--font-mono)', fontSize: 9, color: 'var(--text-muted)', letterSpacing: '0.06em' }}>
	{label.toUpperCase()}
	</span>
	</div>
	))}
	</div>
	</div>
	<ResponsiveContainer width="100%" height={280}>
	<BarChart
	data={MODELS}
	layout="vertical"
	margin={{ top: 0, right: 40, left: 8, bottom: 0 }}
	>
	<CartesianGrid horizontal={false} stroke="rgba(245,240,232,0.04)" />
	<XAxis
	type="number"
	domain={[0, 100]}
	tickFormatter={v => `${v}%`}
	tick={{ fontSize: 9, fontFamily: 'var(--font-mono)', fill: 'var(--text-muted)' }}
	tickLine={false}
	axisLine={false}
	/>
	<YAxis
	type="category"
	dataKey="shortName"
	width={72}
	tick={{ fontSize: 9, fontFamily: 'var(--font-mono)', fill: 'var(--text-secondary)' }}
	tickLine={false}
	axisLine={false}
	/>
	<Tooltip content={<ChartTooltip />} cursor={{ fill: 'rgba(245,240,232,0.03)' }} />
	<ReferenceLine x={61.9} stroke="rgba(217,119,6,0.3)" strokeDasharray="3 3" label={{ value: 'Classical ceiling', position: 'top', fontSize: 8, fontFamily: 'var(--font-mono)', fill: '#d97706' }} />
	<Bar dataKey="accuracy" radius={[0, 2, 2, 0]} maxBarSize={20}>
	{MODELS.map((m) => (
	<Cell key={m.name} fill={TIER_COLOR[m.tier]} fillOpacity={activeRow === m.name ? 1 : 0.75} />
	))}
	</Bar>
	</BarChart>
	</ResponsiveContainer>
	</div>

	{/* ── Full results table ─────────────────────────────────────────────── */}
	<div className="fade-up-4 card" style={{ overflow: 'hidden' }}>
	<div style={{ padding: '18px 20px 12px', borderBottom: '1px solid var(--border)' }}>
	<h2 style={{
	fontFamily: 'var(--font-display)',
	fontWeight: 700,
	fontSize: 13,
	letterSpacing: '0.06em',
	color: 'var(--text-primary)',
	margin: 0,
	}}>
	Full Results
	</h2>
	</div>
	<table style={{ width: '100%', borderCollapse: 'collapse' }}>
	<thead>
	<tr style={{ borderBottom: '1px solid var(--border)' }}>
	{['Model', 'Accuracy', 'Tier', 'Lecture', 'Note'].map(h => (
	<th key={h} style={{
	padding: '8px 16px',
	textAlign: h === 'Accuracy' ? 'right' : 'left',
	fontFamily: 'var(--font-mono)',
	fontSize: 9,
	fontWeight: 700,
	letterSpacing: '0.1em',
	textTransform: 'uppercase',
	color: 'var(--text-muted)',
	}}>
	{h}
	</th>
	))}
	</tr>
	</thead>
	<tbody>
	{MODELS.map((m, i) => (
	<tr
	key={m.name}
	onMouseEnter={() => setActiveRow(m.name)}
	onMouseLeave={() => setActiveRow(null)}
	style={{
	borderBottom: i < MODELS.length - 1 ? '1px solid var(--border)' : 'none',
	background: activeRow === m.name ? 'var(--bg-elevated)' : 'transparent',
	transition: 'background 0.1s',
	borderLeft: `3px solid ${activeRow === m.name ? TIER_COLOR[m.tier] : 'transparent'}`,
	}}
	>
	<td style={{ padding: '10px 16px', fontFamily: 'var(--font-display)', fontSize: 12, fontWeight: 600, color: 'var(--text-primary)' }}>
	{m.name}
	</td>
	<td style={{ padding: '10px 16px', textAlign: 'right', fontFamily: 'var(--font-mono)', fontSize: 13, fontWeight: 700, color: TIER_COLOR[m.tier] }}>
	{m.accuracy.toFixed(1)}%
	</td>
	<td style={{ padding: '10px 16px' }}>
	<TierPill tier={m.tier} />
	</td>
	<td style={{ padding: '10px 16px', fontFamily: 'var(--font-mono)', fontSize: 10, color: 'var(--text-muted)' }}>
	{m.lecture}
	</td>
	<td style={{ padding: '10px 16px', fontFamily: 'var(--font-body)', fontSize: 11, color: 'var(--text-secondary)', lineHeight: 1.5, maxWidth: 260 }}>
	{m.note}
	</td>
	</tr>
	))}
	</tbody>
	</table>
	</div>

	{/* ── Footer note ────────────────────────────────────────────────────── */}
	<p className="fade-up-5" style={{
	marginTop: 20,
	fontFamily: 'var(--font-mono)',
	fontSize: 10,
	color: 'var(--text-muted)',
	lineHeight: 1.6,
	}}>
	* Val set is 21 samples from a handcrafted 100-sample dataset — ensemble 100% reflects
	near-zero variance on a small holdout, not production accuracy. Transformer models were
	trained on the larger combined dataset; classical models trained on the 79-sample split.
	</p>

	</main>
	)
	}