esfiles / frontend /src /components /AnomalyPanel.tsx
Besjon Cifliku
feat: implement anomaly detection to filter suspicious word relations
9c3ade2
Raw
History Blame Contribute Delete
13 kB
import { useState, useEffect } from "react";
import { api } from "../api";
import type {
BackgroundStatus, AnomalySweepResponse, AnomalyRelationResponse, IncongruenceResponse,
} from "../types";
import { useApiCall } from "../hooks/useApiCall";
import ScoreBar from "./ScoreBar";
import StatusMessage from "./StatusMessage";
export default function AnomalyPanel() {
const [bg, setBg] = useState<BackgroundStatus | null>(null);
const [bgLoading, setBgLoading] = useState(false);
const [bgError, setBgError] = useState("");
// Stage A — corpus sweep
const [showAdvanced, setShowAdvanced] = useState(false);
const [minCount, setMinCount] = useState(5);
const [neighbours, setNeighbours] = useState(25);
const [topN, setTopN] = useState(30);
const sweep = useApiCall<AnomalySweepResponse>();
// Stage B — per-word relations
const [selectedWord, setSelectedWord] = useState<string | null>(null);
const relations = useApiCall<AnomalyRelationResponse>();
// Stage C — contextual incongruence (zoom in)
const [keyword, setKeyword] = useState("");
const [canonical, setCanonical] = useState("");
const incong = useApiCall<IncongruenceResponse>();
useEffect(() => {
api.backgroundStatus().then(setBg).catch(() => {});
}, []);
async function loadBackground() {
setBgLoading(true); setBgError("");
try {
setBg(await api.backgroundLoad());
} catch {
setBgError("Background model failed to load (network/disk). Anomaly detection needs it.");
} finally {
setBgLoading(false);
}
}
async function runSweep() {
setSelectedWord(null);
relations.clear();
const res = await sweep.run(() =>
api.analyzeAnomalies({ min_count: minCount, neighbours, top_n: topN }));
if (res && !bg?.ready) api.backgroundStatus().then(setBg).catch(() => {});
}
async function drillInto(word: string) {
setSelectedWord(word);
await relations.run(() => api.analyzeAnomalyRelations({ word, top_k: 15 }));
}
async function zoomIn(word: string, gloss?: string) {
setKeyword(word);
if (gloss !== undefined) setCanonical(gloss);
await incong.run(() =>
api.analyzeIncongruence({ keyword: word, canonical_meaning: gloss || undefined, top_k: 10 }));
document.getElementById("zoom-section")?.scrollIntoView({ behavior: "smooth" });
}
const bgReady = bg?.ready ?? false;
return (
<div>
{/* Background model status */}
<div className="panel">
<h2>Anomalous Relations</h2>
<p className="panel-desc">
Find <strong>code-word candidates</strong>: common English words that behave uncommonly
in this corpus. We contrast each word's neighbours in the corpus Word2Vec against a
pretrained general-English model (GloVe). A relation is flagged when it is{" "}
<em>strong here but weak/absent in normal English</em> — not merely "low similarity".
</p>
{bg && (
<div className="flex-row" style={{ alignItems: "center", gap: 8 }}>
<span
className="badge"
style={{
background: `rgba(${bgReady ? "74, 222, 128" : "255, 170, 0"}, 0.15)`,
color: bgReady ? "var(--ok)" : "var(--accent)",
}}
>
{bg.model_name}: {bgReady ? `ready (${bg.vocab_size.toLocaleString()} words)` : "not loaded"}
</span>
{!bgReady && (
<button className="btn" onClick={loadBackground} disabled={bgLoading}>
{bgLoading ? <><span className="spinner" /> Downloading…</> : "Load background model"}
</button>
)}
</div>
)}
{bgError && <div className="mt-2"><StatusMessage type="err" message={bgError} /></div>}
</div>
{/* Stage A — corpus sweep */}
<div className="panel">
<h3 style={{ marginTop: 0 }}>1 · Scan corpus for anomalous words</h3>
<p className="panel-desc">
Ranks words by neighbour-set divergence (z-scored across the vocabulary). Higher z = the
word's corpus associations look more unlike general English.
</p>
<button className="advanced-toggle" onClick={() => setShowAdvanced(!showAdvanced)}>
{showAdvanced ? "▾" : "▸"} Advanced Settings
</button>
{showAdvanced && (
<div className="advanced-section">
<div className="form-row">
<div className="form-group" style={{ maxWidth: 130 }}>
<label>Min corpus freq</label>
<input type="number" value={minCount} onChange={e => setMinCount(+e.target.value)} min={1} max={1000} />
</div>
<div className="form-group" style={{ maxWidth: 130 }}>
<label>Neighbours (k)</label>
<input type="number" value={neighbours} onChange={e => setNeighbours(+e.target.value)} min={5} max={100} />
</div>
<div className="form-group" style={{ maxWidth: 130 }}>
<label>Top N results</label>
<input type="number" value={topN} onChange={e => setTopN(+e.target.value)} min={1} max={200} />
</div>
</div>
</div>
)}
<button className="btn btn-primary" onClick={runSweep} disabled={sweep.loading} style={{ marginTop: 8 }}>
{sweep.loading ? <><span className="spinner" /> Scanning…</> : "Scan corpus"}
</button>
{sweep.error && <div className="mt-2"><StatusMessage type="err" message={sweep.error} /></div>}
{sweep.data?.note && <div className="mt-2"><StatusMessage type="err" message={sweep.data.note} /></div>}
{sweep.data && sweep.data.results.length > 0 && (
<div className="mt-2">
<div className="section-label">
{sweep.data.results.length} flagged · shared vocab {sweep.data.vocab_size.toLocaleString()} ·
mean shift {sweep.data.shift_mean}
</div>
<table className="data-table">
<thead>
<tr>
<th>Word</th><th>Freq</th><th>z</th>
<th>Surprising neighbours (here, not normal)</th><th></th>
</tr>
</thead>
<tbody>
{sweep.data.results.map((r) => (
<tr
key={r.word}
onClick={() => drillInto(r.word)}
style={{ cursor: "pointer", background: selectedWord === r.word ? "rgba(108,140,255,0.08)" : undefined }}
>
<td style={{ fontWeight: 600 }}>{r.word}</td>
<td>{r.corpus_frequency}</td>
<td>
<span className="badge" style={{
background: `rgba(${r.z_score >= 2 ? "255,107,107" : "108,140,255"},0.15)`,
color: r.z_score >= 2 ? "var(--err)" : "var(--accent)",
}}>{r.z_score.toFixed(2)}</span>
</td>
<td style={{ fontSize: "0.85rem" }}>{r.surprising_neighbors.join(", ") || "—"}</td>
<td style={{ color: "var(--accent)", fontSize: "0.8rem" }}>inspect →</td>
</tr>
))}
</tbody>
</table>
</div>
)}
</div>
{/* Stage B — per-word relations drilldown */}
{selectedWord && (
<div className="panel">
<h3 style={{ marginTop: 0 }}>2 · Relations for "{selectedWord}"</h3>
{relations.loading && <StatusMessage type="loading" message="Computing relations…" />}
{relations.error && <StatusMessage type="err" message={relations.error} />}
{relations.data && !relations.data.found && (
<StatusMessage type="err" message={`"${selectedWord}" — ${relations.data.reason}.`} />
)}
{relations.data?.found && (
<>
<p className="panel-desc">
Surprise = (how strongly tied here) − (how strongly tied in general English), each
standardised within its own space. High surprise = the suspicious pairing.
</p>
<table className="data-table">
<thead>
<tr><th>Neighbour</th><th>Surprise</th><th>Corpus sim</th><th>Normal-English sim</th></tr>
</thead>
<tbody>
{relations.data.relations.map((rel) => (
<tr key={rel.neighbor}>
<td style={{ fontWeight: 600 }}>{rel.neighbor}</td>
<td><ScoreBar score={rel.surprise} max={4} /></td>
<td>{rel.corpus_sim.toFixed(3)}</td>
<td>{rel.background_sim.toFixed(3)}</td>
</tr>
))}
</tbody>
</table>
{relations.data.normal_neighbors && (
<div className="mt-2">
<div className="section-label">For contrast — "{selectedWord}" normally relates to:</div>
<div style={{ fontSize: "0.85rem", color: "var(--muted)" }}>
{relations.data.normal_neighbors.map(n => n.neighbor).join(", ")}
</div>
</div>
)}
<button className="btn btn-primary mt-2" onClick={() => zoomIn(selectedWord, "")}>
Zoom in on occurrences →
</button>
</>
)}
</div>
)}
{/* Stage C — contextual incongruence */}
<div className="panel" id="zoom-section">
<h3 style={{ marginTop: 0 }}>3 · Zoom in — incongruent occurrences</h3>
<p className="panel-desc">
Uses the transformer to rank each occurrence of a keyword by how unlike its norm it is.
Leave the meaning blank to compare against the keyword's <em>typical</em> usage in this
corpus, or supply a dictionary meaning (e.g. "pizza, an Italian food") to flag usages that
drift from it. Highest-incongruence chunks are the candidate coded usages.
</p>
<div className="form-row">
<div className="form-group">
<label>Keyword</label>
<input value={keyword} onChange={e => setKeyword(e.target.value)}
onKeyDown={e => e.key === "Enter" && keyword.trim() && zoomIn(keyword.trim(), canonical)}
placeholder="e.g. pizza" />
</div>
<div className="form-group" style={{ flex: 2 }}>
<label>Canonical meaning (optional)</label>
<input value={canonical} onChange={e => setCanonical(e.target.value)}
onKeyDown={e => e.key === "Enter" && keyword.trim() && zoomIn(keyword.trim(), canonical)}
placeholder="leave blank to use corpus-typical usage" />
</div>
<div className="form-group form-group-sm">
<label>&nbsp;</label>
<button className="btn btn-primary" disabled={incong.loading || !keyword.trim()}
onClick={() => zoomIn(keyword.trim(), canonical)}>
{incong.loading ? "…" : "Zoom"}
</button>
</div>
</div>
{incong.error && <StatusMessage type="err" message={incong.error} />}
{incong.data && incong.data.total_occurrences === 0 && (
<StatusMessage type="err" message={`No occurrences of "${incong.data.keyword}" found.`} />
)}
{incong.data && incong.data.occurrences.length > 0 && (
<div className="mt-2">
<div className="section-label">
{incong.data.total_occurrences} occurrences · reference: {incong.data.reference} ·
median incongruence {incong.data.median_incongruence}
</div>
<div className="flex-col gap-3">
{incong.data.occurrences.map((occ, i) => (
<div key={i} className="result-card">
<div className="result-header">
<span className="context-snippet-source">{occ.doc_id} · chunk {occ.chunk_index}</span>
<span className="badge" style={{
background: "rgba(255,107,107,0.15)", color: "var(--err)",
}}>incongruence {occ.incongruence.toFixed(3)}</span>
</div>
<div className="context-snippet mt-2">{occ.snippet}</div>
{occ.entities.length > 0 && (
<div className="mt-2">
<span className="section-label">Co-occurring: </span>
{occ.entities.map((e, j) => (
<span key={j} className="badge" style={{ marginRight: 4 }}>{e}</span>
))}
</div>
)}
</div>
))}
</div>
</div>
)}
</div>
</div>
);
}