import { useState, useEffect } from "react"; import { api } from "../api"; import type { BackgroundStatus, AnomalySweepResponse, AnomalyRelationResponse, IncongruenceResponse, } from "../types"; import { useApiCall } from "../hooks/useApiCall"; import ScoreBar from "./ScoreBar"; import StatusMessage from "./StatusMessage"; export default function AnomalyPanel() { const [bg, setBg] = useState(null); const [bgLoading, setBgLoading] = useState(false); const [bgError, setBgError] = useState(""); // Stage A — corpus sweep const [showAdvanced, setShowAdvanced] = useState(false); const [minCount, setMinCount] = useState(5); const [neighbours, setNeighbours] = useState(25); const [topN, setTopN] = useState(30); const sweep = useApiCall(); // Stage B — per-word relations const [selectedWord, setSelectedWord] = useState(null); const relations = useApiCall(); // Stage C — contextual incongruence (zoom in) const [keyword, setKeyword] = useState(""); const [canonical, setCanonical] = useState(""); const incong = useApiCall(); useEffect(() => { api.backgroundStatus().then(setBg).catch(() => {}); }, []); async function loadBackground() { setBgLoading(true); setBgError(""); try { setBg(await api.backgroundLoad()); } catch { setBgError("Background model failed to load (network/disk). Anomaly detection needs it."); } finally { setBgLoading(false); } } async function runSweep() { setSelectedWord(null); relations.clear(); const res = await sweep.run(() => api.analyzeAnomalies({ min_count: minCount, neighbours, top_n: topN })); if (res && !bg?.ready) api.backgroundStatus().then(setBg).catch(() => {}); } async function drillInto(word: string) { setSelectedWord(word); await relations.run(() => api.analyzeAnomalyRelations({ word, top_k: 15 })); } async function zoomIn(word: string, gloss?: string) { setKeyword(word); if (gloss !== undefined) setCanonical(gloss); await incong.run(() => api.analyzeIncongruence({ keyword: word, canonical_meaning: gloss || undefined, top_k: 10 })); document.getElementById("zoom-section")?.scrollIntoView({ behavior: "smooth" }); } const bgReady = bg?.ready ?? false; return (
{/* Background model status */}

Anomalous Relations

Find code-word candidates: common English words that behave uncommonly in this corpus. We contrast each word's neighbours in the corpus Word2Vec against a pretrained general-English model (GloVe). A relation is flagged when it is{" "} strong here but weak/absent in normal English — not merely "low similarity".

{bg && (
{bg.model_name}: {bgReady ? `ready (${bg.vocab_size.toLocaleString()} words)` : "not loaded"} {!bgReady && ( )}
)} {bgError &&
}
{/* Stage A — corpus sweep */}

1 · Scan corpus for anomalous words

Ranks words by neighbour-set divergence (z-scored across the vocabulary). Higher z = the word's corpus associations look more unlike general English.

{showAdvanced && (
setMinCount(+e.target.value)} min={1} max={1000} />
setNeighbours(+e.target.value)} min={5} max={100} />
setTopN(+e.target.value)} min={1} max={200} />
)} {sweep.error &&
} {sweep.data?.note &&
} {sweep.data && sweep.data.results.length > 0 && (
{sweep.data.results.length} flagged · shared vocab {sweep.data.vocab_size.toLocaleString()} · mean shift {sweep.data.shift_mean}
{sweep.data.results.map((r) => ( drillInto(r.word)} style={{ cursor: "pointer", background: selectedWord === r.word ? "rgba(108,140,255,0.08)" : undefined }} > ))}
WordFreqz Surprising neighbours (here, not normal)
{r.word} {r.corpus_frequency} = 2 ? "255,107,107" : "108,140,255"},0.15)`, color: r.z_score >= 2 ? "var(--err)" : "var(--accent)", }}>{r.z_score.toFixed(2)} {r.surprising_neighbors.join(", ") || "—"} inspect →
)}
{/* Stage B — per-word relations drilldown */} {selectedWord && (

2 · Relations for "{selectedWord}"

{relations.loading && } {relations.error && } {relations.data && !relations.data.found && ( )} {relations.data?.found && ( <>

Surprise = (how strongly tied here) − (how strongly tied in general English), each standardised within its own space. High surprise = the suspicious pairing.

{relations.data.relations.map((rel) => ( ))}
NeighbourSurpriseCorpus simNormal-English sim
{rel.neighbor} {rel.corpus_sim.toFixed(3)} {rel.background_sim.toFixed(3)}
{relations.data.normal_neighbors && (
For contrast — "{selectedWord}" normally relates to:
{relations.data.normal_neighbors.map(n => n.neighbor).join(", ")}
)} )}
)} {/* Stage C — contextual incongruence */}

3 · Zoom in — incongruent occurrences

Uses the transformer to rank each occurrence of a keyword by how unlike its norm it is. Leave the meaning blank to compare against the keyword's typical usage in this corpus, or supply a dictionary meaning (e.g. "pizza, an Italian food") to flag usages that drift from it. Highest-incongruence chunks are the candidate coded usages.

setKeyword(e.target.value)} onKeyDown={e => e.key === "Enter" && keyword.trim() && zoomIn(keyword.trim(), canonical)} placeholder="e.g. pizza" />
setCanonical(e.target.value)} onKeyDown={e => e.key === "Enter" && keyword.trim() && zoomIn(keyword.trim(), canonical)} placeholder="leave blank to use corpus-typical usage" />
{incong.error && } {incong.data && incong.data.total_occurrences === 0 && ( )} {incong.data && incong.data.occurrences.length > 0 && (
{incong.data.total_occurrences} occurrences · reference: {incong.data.reference} · median incongruence {incong.data.median_incongruence}
{incong.data.occurrences.map((occ, i) => (
{occ.doc_id} · chunk {occ.chunk_index} incongruence {occ.incongruence.toFixed(3)}
{occ.snippet}
{occ.entities.length > 0 && (
Co-occurring: {occ.entities.map((e, j) => ( {e} ))}
)}
))}
)}
); }