| import { useState, useEffect } from "react"; |
| import { api } from "../api"; |
| import type { |
| BackgroundStatus, AnomalySweepResponse, AnomalyRelationResponse, IncongruenceResponse, |
| } from "../types"; |
| import { useApiCall } from "../hooks/useApiCall"; |
| import ScoreBar from "./ScoreBar"; |
| import StatusMessage from "./StatusMessage"; |
|
|
| export default function AnomalyPanel() { |
| const [bg, setBg] = useState<BackgroundStatus | null>(null); |
| const [bgLoading, setBgLoading] = useState(false); |
| const [bgError, setBgError] = useState(""); |
|
|
| |
| const [showAdvanced, setShowAdvanced] = useState(false); |
| const [minCount, setMinCount] = useState(5); |
| const [neighbours, setNeighbours] = useState(25); |
| const [topN, setTopN] = useState(30); |
| const sweep = useApiCall<AnomalySweepResponse>(); |
|
|
| |
| const [selectedWord, setSelectedWord] = useState<string | null>(null); |
| const relations = useApiCall<AnomalyRelationResponse>(); |
|
|
| |
| const [keyword, setKeyword] = useState(""); |
| const [canonical, setCanonical] = useState(""); |
| const incong = useApiCall<IncongruenceResponse>(); |
|
|
| useEffect(() => { |
| api.backgroundStatus().then(setBg).catch(() => {}); |
| }, []); |
|
|
| async function loadBackground() { |
| setBgLoading(true); setBgError(""); |
| try { |
| setBg(await api.backgroundLoad()); |
| } catch { |
| setBgError("Background model failed to load (network/disk). Anomaly detection needs it."); |
| } finally { |
| setBgLoading(false); |
| } |
| } |
|
|
| async function runSweep() { |
| setSelectedWord(null); |
| relations.clear(); |
| const res = await sweep.run(() => |
| api.analyzeAnomalies({ min_count: minCount, neighbours, top_n: topN })); |
| if (res && !bg?.ready) api.backgroundStatus().then(setBg).catch(() => {}); |
| } |
|
|
| async function drillInto(word: string) { |
| setSelectedWord(word); |
| await relations.run(() => api.analyzeAnomalyRelations({ word, top_k: 15 })); |
| } |
|
|
| async function zoomIn(word: string, gloss?: string) { |
| setKeyword(word); |
| if (gloss !== undefined) setCanonical(gloss); |
| await incong.run(() => |
| api.analyzeIncongruence({ keyword: word, canonical_meaning: gloss || undefined, top_k: 10 })); |
| document.getElementById("zoom-section")?.scrollIntoView({ behavior: "smooth" }); |
| } |
|
|
| const bgReady = bg?.ready ?? false; |
|
|
| return ( |
| <div> |
| {/* Background model status */} |
| <div className="panel"> |
| <h2>Anomalous Relations</h2> |
| <p className="panel-desc"> |
| Find <strong>code-word candidates</strong>: common English words that behave uncommonly |
| in this corpus. We contrast each word's neighbours in the corpus Word2Vec against a |
| pretrained general-English model (GloVe). A relation is flagged when it is{" "} |
| <em>strong here but weak/absent in normal English</em> — not merely "low similarity". |
| </p> |
| {bg && ( |
| <div className="flex-row" style={{ alignItems: "center", gap: 8 }}> |
| <span |
| className="badge" |
| style={{ |
| background: `rgba(${bgReady ? "74, 222, 128" : "255, 170, 0"}, 0.15)`, |
| color: bgReady ? "var(--ok)" : "var(--accent)", |
| }} |
| > |
| {bg.model_name}: {bgReady ? `ready (${bg.vocab_size.toLocaleString()} words)` : "not loaded"} |
| </span> |
| {!bgReady && ( |
| <button className="btn" onClick={loadBackground} disabled={bgLoading}> |
| {bgLoading ? <><span className="spinner" /> Downloading…</> : "Load background model"} |
| </button> |
| )} |
| </div> |
| )} |
| {bgError && <div className="mt-2"><StatusMessage type="err" message={bgError} /></div>} |
| </div> |
|
|
| {} |
| <div className="panel"> |
| <h3 style={{ marginTop: 0 }}>1 · Scan corpus for anomalous words</h3> |
| <p className="panel-desc"> |
| Ranks words by neighbour-set divergence (z-scored across the vocabulary). Higher z = the |
| word's corpus associations look more unlike general English. |
| </p> |
|
|
| <button className="advanced-toggle" onClick={() => setShowAdvanced(!showAdvanced)}> |
| {showAdvanced ? "▾" : "▸"} Advanced Settings |
| </button> |
| {showAdvanced && ( |
| <div className="advanced-section"> |
| <div className="form-row"> |
| <div className="form-group" style={{ maxWidth: 130 }}> |
| <label>Min corpus freq</label> |
| <input type="number" value={minCount} onChange={e => setMinCount(+e.target.value)} min={1} max={1000} /> |
| </div> |
| <div className="form-group" style={{ maxWidth: 130 }}> |
| <label>Neighbours (k)</label> |
| <input type="number" value={neighbours} onChange={e => setNeighbours(+e.target.value)} min={5} max={100} /> |
| </div> |
| <div className="form-group" style={{ maxWidth: 130 }}> |
| <label>Top N results</label> |
| <input type="number" value={topN} onChange={e => setTopN(+e.target.value)} min={1} max={200} /> |
| </div> |
| </div> |
| </div> |
| )} |
|
|
| <button className="btn btn-primary" onClick={runSweep} disabled={sweep.loading} style={{ marginTop: 8 }}> |
| {sweep.loading ? <><span className="spinner" /> Scanning…</> : "Scan corpus"} |
| </button> |
|
|
| {sweep.error && <div className="mt-2"><StatusMessage type="err" message={sweep.error} /></div>} |
| {sweep.data?.note && <div className="mt-2"><StatusMessage type="err" message={sweep.data.note} /></div>} |
|
|
| {sweep.data && sweep.data.results.length > 0 && ( |
| <div className="mt-2"> |
| <div className="section-label"> |
| {sweep.data.results.length} flagged · shared vocab {sweep.data.vocab_size.toLocaleString()} · |
| mean shift {sweep.data.shift_mean} |
| </div> |
| <table className="data-table"> |
| <thead> |
| <tr> |
| <th>Word</th><th>Freq</th><th>z</th> |
| <th>Surprising neighbours (here, not normal)</th><th></th> |
| </tr> |
| </thead> |
| <tbody> |
| {sweep.data.results.map((r) => ( |
| <tr |
| key={r.word} |
| onClick={() => drillInto(r.word)} |
| style={{ cursor: "pointer", background: selectedWord === r.word ? "rgba(108,140,255,0.08)" : undefined }} |
| > |
| <td style={{ fontWeight: 600 }}>{r.word}</td> |
| <td>{r.corpus_frequency}</td> |
| <td> |
| <span className="badge" style={{ |
| background: `rgba(${r.z_score >= 2 ? "255,107,107" : "108,140,255"},0.15)`, |
| color: r.z_score >= 2 ? "var(--err)" : "var(--accent)", |
| }}>{r.z_score.toFixed(2)}</span> |
| </td> |
| <td style={{ fontSize: "0.85rem" }}>{r.surprising_neighbors.join(", ") || "—"}</td> |
| <td style={{ color: "var(--accent)", fontSize: "0.8rem" }}>inspect →</td> |
| </tr> |
| ))} |
| </tbody> |
| </table> |
| </div> |
| )} |
| </div> |
|
|
| {} |
| {selectedWord && ( |
| <div className="panel"> |
| <h3 style={{ marginTop: 0 }}>2 · Relations for "{selectedWord}"</h3> |
| {relations.loading && <StatusMessage type="loading" message="Computing relations…" />} |
| {relations.error && <StatusMessage type="err" message={relations.error} />} |
| {relations.data && !relations.data.found && ( |
| <StatusMessage type="err" message={`"${selectedWord}" — ${relations.data.reason}.`} /> |
| )} |
| {relations.data?.found && ( |
| <> |
| <p className="panel-desc"> |
| Surprise = (how strongly tied here) − (how strongly tied in general English), each |
| standardised within its own space. High surprise = the suspicious pairing. |
| </p> |
| <table className="data-table"> |
| <thead> |
| <tr><th>Neighbour</th><th>Surprise</th><th>Corpus sim</th><th>Normal-English sim</th></tr> |
| </thead> |
| <tbody> |
| {relations.data.relations.map((rel) => ( |
| <tr key={rel.neighbor}> |
| <td style={{ fontWeight: 600 }}>{rel.neighbor}</td> |
| <td><ScoreBar score={rel.surprise} max={4} /></td> |
| <td>{rel.corpus_sim.toFixed(3)}</td> |
| <td>{rel.background_sim.toFixed(3)}</td> |
| </tr> |
| ))} |
| </tbody> |
| </table> |
| {relations.data.normal_neighbors && ( |
| <div className="mt-2"> |
| <div className="section-label">For contrast — "{selectedWord}" normally relates to:</div> |
| <div style={{ fontSize: "0.85rem", color: "var(--muted)" }}> |
| {relations.data.normal_neighbors.map(n => n.neighbor).join(", ")} |
| </div> |
| </div> |
| )} |
| <button className="btn btn-primary mt-2" onClick={() => zoomIn(selectedWord, "")}> |
| Zoom in on occurrences → |
| </button> |
| </> |
| )} |
| </div> |
| )} |
|
|
| {} |
| <div className="panel" id="zoom-section"> |
| <h3 style={{ marginTop: 0 }}>3 · Zoom in — incongruent occurrences</h3> |
| <p className="panel-desc"> |
| Uses the transformer to rank each occurrence of a keyword by how unlike its norm it is. |
| Leave the meaning blank to compare against the keyword's <em>typical</em> usage in this |
| corpus, or supply a dictionary meaning (e.g. "pizza, an Italian food") to flag usages that |
| drift from it. Highest-incongruence chunks are the candidate coded usages. |
| </p> |
| <div className="form-row"> |
| <div className="form-group"> |
| <label>Keyword</label> |
| <input value={keyword} onChange={e => setKeyword(e.target.value)} |
| onKeyDown={e => e.key === "Enter" && keyword.trim() && zoomIn(keyword.trim(), canonical)} |
| placeholder="e.g. pizza" /> |
| </div> |
| <div className="form-group" style={{ flex: 2 }}> |
| <label>Canonical meaning (optional)</label> |
| <input value={canonical} onChange={e => setCanonical(e.target.value)} |
| onKeyDown={e => e.key === "Enter" && keyword.trim() && zoomIn(keyword.trim(), canonical)} |
| placeholder="leave blank to use corpus-typical usage" /> |
| </div> |
| <div className="form-group form-group-sm"> |
| <label> </label> |
| <button className="btn btn-primary" disabled={incong.loading || !keyword.trim()} |
| onClick={() => zoomIn(keyword.trim(), canonical)}> |
| {incong.loading ? "…" : "Zoom"} |
| </button> |
| </div> |
| </div> |
|
|
| {incong.error && <StatusMessage type="err" message={incong.error} />} |
| {incong.data && incong.data.total_occurrences === 0 && ( |
| <StatusMessage type="err" message={`No occurrences of "${incong.data.keyword}" found.`} /> |
| )} |
| {incong.data && incong.data.occurrences.length > 0 && ( |
| <div className="mt-2"> |
| <div className="section-label"> |
| {incong.data.total_occurrences} occurrences · reference: {incong.data.reference} · |
| median incongruence {incong.data.median_incongruence} |
| </div> |
| <div className="flex-col gap-3"> |
| {incong.data.occurrences.map((occ, i) => ( |
| <div key={i} className="result-card"> |
| <div className="result-header"> |
| <span className="context-snippet-source">{occ.doc_id} · chunk {occ.chunk_index}</span> |
| <span className="badge" style={{ |
| background: "rgba(255,107,107,0.15)", color: "var(--err)", |
| }}>incongruence {occ.incongruence.toFixed(3)}</span> |
| </div> |
| <div className="context-snippet mt-2">{occ.snippet}</div> |
| {occ.entities.length > 0 && ( |
| <div className="mt-2"> |
| <span className="section-label">Co-occurring: </span> |
| {occ.entities.map((e, j) => ( |
| <span key={j} className="badge" style={{ marginRight: 4 }}>{e}</span> |
| ))} |
| </div> |
| )} |
| </div> |
| ))} |
| </div> |
| </div> |
| )} |
| </div> |
| </div> |
| ); |
| } |
|
|