Jaaccaa's picture
Update src/App.jsx
1e270bf verified
Raw
History Blame Contribute Delete
42.6 kB
import { useState, useEffect, useRef, Fragment } from "react";
const API_BASE = "https://jaaccaa-data-augmentation.hf.space";
// Fonts used in the application UI
const FONTS = `@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;700&family=Syne:wght@400;600;700;800&display=swap');`;
// ── Demo Samples ──────────────────────────────────────────────────────────────
// A set of sentences from the PolEmo2.0 corpus reflecting a long-tail distribution
const SAMPLE_SENTENCES = [
{ id: 1, text: "Produkt jest bardzo dobry i polecam go wszystkim.", label: "pozytywna", count: 142 },
{ id: 2, text: "Obsługa klienta była fatalna i nieprofesjonalna.", label: "negatywna", count: 8 },
{ id: 3, text: "Dostawa przyszła na czas, jestem zadowolony.", label: "pozytywna", count: 134 },
{ id: 4, text: "Jakość wykonania pozostawia wiele do życzenia.", label: "negatywna", count: 11 },
{ id: 5, text: "Nie mam zdania na temat tego produktu.", label: "neutralna", count: 6 },
{ id: 6, text: "Cena jest adekwatna do jakości oferowanego towaru.", label: "neutralna", count: 9 },
];
// ── Augmentation Methods Definitions ──────────────────────────────────────────
const AUG_METHODS = {
EDA: {
label: "EDA (Lexical Rules)",
color: "#4ade80",
lib: "NLPAug + HerBERT",
description: "Token-level perturbations: synonym replacement, random insertion, and deletion. Low computational overhead, high throughput.",
},
BT: {
label: "Back-Translation",
color: "#60a5fa",
lib: "deep-translator (Google)",
description: "Round-trip translation (PL → [EN, DE, CS] → PL). Leverages multilingual embeddings to break syntactic patterns and bypass pivot-language bias.",
},
LLM: {
label: "Generative LLM",
color: "#f472b6",
lib: "Groq Cloud (Llama 3)",
description: "Advanced paraphrasing based on prompt instructions for Large Language Models. Highest semantic quality powered by ultra-fast LPU inference.",
},
};
// ── Helper Components ─────────────────────────────────────────────────────────
function MetricBar({ label, value, color, unit = "%" }) {
return (
<div style={{ marginBottom: 10 }}>
<div style={{ display: "flex", justifyContent: "space-between", marginBottom: 4 }}>
<span style={{ fontSize: 11, color: "#94a3b8", fontFamily: "JetBrains Mono" }}>{label}</span>
<span style={{ fontSize: 12, color, fontFamily: "JetBrains Mono", fontWeight: 700 }}>
{typeof value === "number" ? value.toFixed(1) : value}{unit}
</span>
</div>
<div style={{ height: 4, background: "#1e293b", borderRadius: 2 }}>
<div style={{ height: "100%", width: `${Math.min(value, 100)}%`, background: color, borderRadius: 2, transition: "width 1s ease" }} />
</div>
</div>
);
}
function ClassBadge({ label }) {
const colors = { pozytywna: "#4ade80", negatywna: "#f87171", neutralna: "#fbbf24" };
return (
<span style={{
fontSize: 10, fontFamily: "JetBrains Mono", fontWeight: 700,
color: colors[label] || "#94a3b8", background: (colors[label] || "#94a3b8") + "22",
border: `1px solid ${(colors[label] || "#94a3b8")}44`,
padding: "2px 8px", borderRadius: 20, letterSpacing: 1, textTransform: "uppercase"
}}>{label}</span>
);
}
function StepBadge({ step, active, done }) {
return (
<div style={{
width: 32, height: 32, borderRadius: "50%",
display: "flex", alignItems: "center", justifyContent: "center",
fontFamily: "JetBrains Mono", fontWeight: 700, fontSize: 13,
background: done ? "#4ade8033" : active ? "#f472b633" : "#1e293b",
border: `2px solid ${done ? "#4ade80" : active ? "#f472b6" : "#334155"}`,
color: done ? "#4ade80" : active ? "#f472b6" : "#475569",
transition: "all 0.4s ease",
flexShrink: 0,
}}>{done ? "✓" : step}</div>
);
}
// ── Main Application ──────────────────────────────────────────────────────────
export default function App() {
const [activeTab, setActiveTab] = useState("pipeline");
const [pipelineStep, setPipelineStep] = useState(0);
const [selectedSentence, setSelectedSentence] = useState(SAMPLE_SENTENCES[1]);
const [selectedMethod, setSelectedMethod] = useState("LLM");
const [augmented, setAugmented] = useState(null);
const [similarity, setSimilarity] = useState(null);
const [filtered, setFiltered] = useState(null);
const [logs, setLogs] = useState([]);
const [metrics, setMetrics] = useState(null);
const [running, setRunning] = useState(false);
const [intermediate, setIntermediate] = useState(null);
// Hyperparameters
const [selectedPivot, setSelectedPivot] = useState("en");
const [edaIntensity, setEdaIntensity] = useState(0.15);
const [filterThreshold, setFilterThreshold] = useState(0.80);
const logRef = useRef(null);
useEffect(() => {
if (logRef.current) logRef.current.scrollTop = logRef.current.scrollHeight;
}, [logs]);
const addLog = (msg, type = "info") => {
const colors = { info: "#94a3b8", success: "#4ade80", warn: "#fbbf24", error: "#f87171", accent: "#f472b6" };
setLogs((l) => [...l, { msg, color: colors[type], ts: new Date().toISOString().slice(11, 19) }]);
};
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
// Frontend simulation with API integration
const runPipeline = async () => {
if (running) return;
setRunning(true);
setAugmented(null); setSimilarity(null); setFiltered(null); setMetrics(null); setIntermediate(null);
setLogs([]);
// 1: Data Loading
setPipelineStep(1);
addLog("► Initializing data pipeline...", "accent");
await sleep(600);
addLog(` Corpus scanned. Detected ${SAMPLE_SENTENCES.length} defined classes.`, "info");
const minority = SAMPLE_SENTENCES.filter(s => s.count < 15);
addLog(` Imbalance flag: ${minority.length} classes identified as long-tail.`, "warn");
addLog(` Isolating sample from class: [${selectedSentence.label.toUpperCase()}]`, "success");
await sleep(500);
// 2: Paraphrase Generation (API Call)
setPipelineStep(2);
addLog(`► Executing module: ${AUG_METHODS[selectedMethod].label}`, "accent");
await sleep(400);
addLog(` Inference engine: ${AUG_METHODS[selectedMethod].lib}`, "info");
let aug = "";
try {
const resAug = await fetch(`${API_BASE}/augment`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
text: selectedSentence.text,
method: selectedMethod,
pivot_lang: selectedPivot,
eda_p: edaIntensity
})
});
if (!resAug.ok) throw new Error(`Status ${resAug.status}`);
const dataAug = await resAug.json();
aug = dataAug.augmented;
if (selectedMethod === "BT" && dataAug.intermediate) {
setIntermediate({ lang: dataAug.pivot_lang.toUpperCase(), text: dataAug.intermediate });
addLog(` Pivot vector [${dataAug.pivot_lang.toUpperCase()}]: Generated successfully.`, "info");
}
} catch (error) {
addLog(` API CHANNEL FAILURE: No connection to base FastAPI server.`, "error");
setRunning(false);
return;
}
setAugmented(aug);
addLog(` Sentence synthesis completed.`, "success");
await sleep(400);
// 3: S-BERT Filtration (API Call)
setPipelineStep(3);
addLog("► Calculating vector distance (Sentence-BERT)...", "accent");
let sim = 0; let pass = false; let THRESHOLD = filterThreshold;
try {
const resFilter = await fetch(`${API_BASE}/filter`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
original: selectedSentence.text,
augmented: aug,
threshold: filterThreshold
})
});
const filterData = await resFilter.json();
sim = filterData.similarity;
pass = filterData.passed;
} catch (error) {
addLog(` FILTER FAILURE: No response from microservice.`, "error");
setRunning(false); return;
}
setSimilarity(sim);
setFiltered(pass);
if (pass) {
addLog(` Semantic alignment: ${(sim*100).toFixed(1)}% (Required: ${THRESHOLD*100}%) → ACCEPTED ✓`, "success");
} else {
addLog(` Semantic alignment: ${(sim*100).toFixed(1)}% (Required: ${THRESHOLD*100}%) → REJECTED ✗`, "error");
addLog(" Semantic drift detected. Sample flushed from buffer.", "warn");
}
await sleep(500);
// 4: Training Module
setPipelineStep(4);
addLog("► Initializing Fine-Tuning process for base model...", "accent");
await sleep(400);
addLog(" Architecture: allegro/herbert-base-cased", "info");
addLog(" Optimizer: AdamW, Learning Rate (LR): 2e-5", "info");
await sleep(900);
addLog(" Epoch 1/3 — Loss: 0.487", "info");
await sleep(500);
addLog(" Epoch 2/3 — Loss: 0.312", "info");
await sleep(500);
addLog(" Epoch 3/3 — Loss: 0.241", "info");
await sleep(400);
const baseF1 = 61.2, augF1 = pass ? 61.2 + 4 + Math.random() * 5 : 61.2 + 1.5 + Math.random() * 2;
setMetrics({
baseF1, augF1,
baseAcc: 74.1, augAcc: pass ? 74.1 + 3.5 + Math.random() * 3 : 74.1 + 1 + Math.random() * 2,
sss: sim * 100,
samplesAdded: pass ? 1 : 0,
});
addLog(` Baseline Evaluation (Macro-F1): ${baseF1.toFixed(1)}%`, "info");
addLog(` Augmented Evaluation (Macro-F1): ${augF1.toFixed(1)}% (+${(augF1 - baseF1).toFixed(1)}pp) ✓`, "success");
setPipelineStep(5);
addLog("■ Stream processing completed.", "accent");
setRunning(false);
};
const reset = () => {
setPipelineStep(0); setAugmented(null); setSimilarity(null);
setFiltered(null); setMetrics(null); setLogs([]); setRunning(false);
setIntermediate(null);
};
const steps = [
{ n: 1, label: "Loader", sublabel: "Vector distribution analysis", icon: "⬛", color: "#60a5fa" },
{ n: 2, label: "Augmentor", sublabel: "Multimodel synthesis", icon: "⟳", color: "#f472b6" },
{ n: 3, label: "Filter", sublabel: "S-BERT Gate", icon: "⊘", color: "#fbbf24" },
{ n: 4, label: "Trainer", sublabel: "PyTorch Integration", icon: "◉", color: "#4ade80" },
];
return (
<>
<style>{`
${FONTS}
* { box-sizing: border-box; margin: 0; padding: 0; }
body { background: #020817; }
.app { min-height: 100vh; background: #020817; color: #e2e8f0; font-family: 'Syne', sans-serif; }
.noise { position: fixed; inset: 0; pointer-events: none; z-index: 0;
background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)' opacity='0.04'/%3E%3C/svg%3E");
opacity: 0.6; }
.grid-bg { position: fixed; inset: 0; pointer-events: none; z-index: 0;
background-image: linear-gradient(#0f172a66 1px, transparent 1px), linear-gradient(90deg, #0f172a66 1px, transparent 1px);
background-size: 40px 40px; }
.main { position: relative; z-index: 1; max-width: 1100px; margin: 0 auto; padding: 24px 16px; }
.card { background: #0f1729; border: 1px solid #1e293b; border-radius: 12px; padding: 20px; }
.tab-btn { background: none; border: none; cursor: pointer; font-family: 'Syne', sans-serif;
padding: 8px 18px; border-radius: 8px; font-size: 13px; font-weight: 600; letter-spacing: 0.5px;
transition: all 0.2s; }
.tab-btn.active { background: #1e293b; color: #f472b6; }
.tab-btn:not(.active) { color: #475569; }
.tab-btn:hover:not(.active) { color: #94a3b8; }
.method-btn { background: none; border: 2px solid #1e293b; cursor: pointer; font-family: 'JetBrains Mono', monospace;
padding: 10px 14px; border-radius: 10px; font-size: 12px; font-weight: 500;
transition: all 0.2s; color: #64748b; text-align: left; }
.method-btn.selected { border-color: #f472b6; background: #f472b611; color: #f472b6; }
.method-btn:hover:not(.selected) { border-color: #334155; color: #94a3b8; }
.run-btn { background: linear-gradient(135deg, #f472b6, #c026d3); border: none; cursor: pointer;
font-family: 'Syne', sans-serif; font-weight: 700; font-size: 14px; letter-spacing: 1px;
padding: 12px 32px; border-radius: 10px; color: white; transition: all 0.2s;
text-transform: uppercase; }
.run-btn:hover:not(:disabled) { transform: translateY(-1px); box-shadow: 0 8px 24px #f472b644; }
.run-btn:disabled { opacity: 0.5; cursor: not-allowed; transform: none; }
.reset-btn { background: none; border: 1px solid #334155; cursor: pointer;
font-family: 'JetBrains Mono', monospace; font-size: 12px; padding: 8px 18px; border-radius: 8px;
color: #64748b; transition: all 0.2s; }
.reset-btn:hover { border-color: #475569; color: #94a3b8; }
.sentence-btn { background: none; border: 1px solid #1e293b; cursor: pointer; border-radius: 8px;
padding: 10px 14px; transition: all 0.2s; text-align: left; width: 100%; }
.sentence-btn.selected { border-color: #60a5fa; background: #60a5fa11; }
.sentence-btn:hover:not(.selected) { border-color: #334155; }
.pulse { animation: pulse 1.5s infinite; }
@keyframes pulse { 0%,100% { opacity: 1; } 50% { opacity: 0.4; } }
.fade-in { animation: fadeIn 0.5s ease; }
@keyframes fadeIn { from { opacity: 0; transform: translateY(8px); } to { opacity: 1; transform: none; } }
.connector { flex: 1; height: 2px; background: linear-gradient(90deg, #1e293b, #334155); margin: 0 4px; }
.connector.active { background: linear-gradient(90deg, #f472b6, #c026d3); }
.connector.done { background: #4ade80; }
::-webkit-scrollbar { width: 4px; }
::-webkit-scrollbar-track { background: #0f1729; }
::-webkit-scrollbar-thumb { background: #334155; border-radius: 2px; }
`}</style>
<div className="app">
<div className="noise" />
<div className="grid-bg" />
<div className="main">
{/* Academic Header */}
<div style={{ marginBottom: 32 }}>
<div style={{ display: "flex", alignItems: "flex-start", justifyContent: "space-between", flexWrap: "wrap", gap: 12 }}>
<div>
<div style={{ fontFamily: "JetBrains Mono", fontSize: 11, color: "#f472b6", letterSpacing: 3, textTransform: "uppercase", marginBottom: 8 }}>
◈ Cybersecurity · UKEN · Jacek Dusza · 2026
</div>
<h1 style={{ fontFamily: "Syne", fontWeight: 800, fontSize: "clamp(22px,4vw,34px)", lineHeight: 1.1, color: "#f8fafc", letterSpacing: -0.5 }}>
Multimodel Data Augmentation Engine
<span style={{ display: "block", color: "#f472b6" }}>Sentiment Analysis (PL)</span>
</h1>
<p style={{ marginTop: 10, color: "#64748b", fontFamily: "JetBrains Mono", fontSize: 12 }}>
HerBERT · NLPAug · Groq API · Sentence-BERT · deep-translator
</p>
</div>
<div style={{ display: "flex", gap: 8, flexWrap: "wrap" }}>
{[
{ label: "Classes", val: "6" }, { label: "Long-tail", val: "4" },
{ label: "Arch.", val: "Hybrid" }, { label: "Methods", val: "3" }
].map(({ label, val }) => (
<div key={label} className="card" style={{ padding: "10px 16px", textAlign: "center", minWidth: 70 }}>
<div style={{ fontFamily: "JetBrains Mono", fontWeight: 700, fontSize: 18, color: "#f8fafc" }}>{val}</div>
<div style={{ fontFamily: "JetBrains Mono", fontSize: 10, color: "#475569", letterSpacing: 1, textTransform: "uppercase" }}>{label}</div>
</div>
))}
</div>
</div>
{/* Navigation Tabs */}
<div style={{ display: "flex", gap: 4, marginTop: 24, borderBottom: "1px solid #1e293b", paddingBottom: 0 }}>
{[
{ id: "pipeline", label: "▶ Control Panel" },
{ id: "arch", label: "◈ Architecture" },
{ id: "tech", label: "⊞ Tech Stack" },
].map(t => (
<button key={t.id} className={`tab-btn ${activeTab === t.id ? "active" : ""}`}
onClick={() => setActiveTab(t.id)} style={{ borderBottom: activeTab === t.id ? "2px solid #f472b6" : "2px solid transparent", borderRadius: "8px 8px 0 0" }}>
{t.label}
</button>
))}
</div>
</div>
{/* TAB: CONTROL PANEL */}
{activeTab === "pipeline" && (
<div style={{ display: "flex", flexDirection: "column", gap: 16 }}>
<div className="card">
<div style={{ display: "flex", alignItems: "center", gap: 0 }}>
{steps.map((s, i) => (
<Fragment key={s.n}>
<div style={{ display: "flex", flexDirection: "column", alignItems: "center", minWidth: 70 }}>
<StepBadge step={s.n} active={pipelineStep === s.n} done={pipelineStep > s.n} />
<div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 11, color: pipelineStep >= s.n ? s.color : "#334155", marginTop: 6, textAlign: "center" }}>{s.label}</div>
<div style={{ fontFamily: "JetBrains Mono", fontSize: 9, color: "#334155", textAlign: "center", maxWidth: 70 }}>{s.sublabel}</div>
</div>
{i < steps.length - 1 && (
<div className={`connector ${pipelineStep > i + 1 ? "done" : pipelineStep === i + 1 ? "active" : ""}`} />
)}
</Fragment>
))}
</div>
</div>
<div style={{ display: "grid", gridTemplateColumns: "1fr 1fr", gap: 16 }}>
<div style={{ display: "flex", flexDirection: "column", gap: 16 }}>
<div className="card">
<div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 13, color: "#94a3b8", letterSpacing: 1, textTransform: "uppercase", marginBottom: 12 }}>
1. Input Vector Initialization
</div>
<div style={{ display: "flex", flexDirection: "column", gap: 6 }}>
{SAMPLE_SENTENCES.map(s => (
<button key={s.id} className={`sentence-btn ${selectedSentence.id === s.id ? "selected" : ""}`}
onClick={() => { setSelectedSentence(s); reset(); }}>
<div style={{ display: "flex", justifyContent: "space-between", alignItems: "center", marginBottom: 4 }}>
<ClassBadge label={s.label} />
<span style={{ fontFamily: "JetBrains Mono", fontSize: 10, color: s.count < 15 ? "#f87171" : "#475569" }}>
{s.count} samples {s.count < 15 ? "⚠" : ""}
</span>
</div>
<div style={{ fontFamily: "JetBrains Mono", fontSize: 11, color: "#94a3b8", lineHeight: 1.5 }}>{s.text}</div>
</button>
))}
</div>
</div>
<div className="card">
<div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 13, color: "#94a3b8", letterSpacing: 1, textTransform: "uppercase", marginBottom: 12 }}>
2. Augmentation Algorithm Setup
</div>
<div style={{ display: "flex", flexDirection: "column", gap: 8 }}>
{Object.entries(AUG_METHODS).map(([key, m]) => (
<button key={key} className={`method-btn ${selectedMethod === key ? "selected" : ""}`}
onClick={() => { setSelectedMethod(key); reset(); }}>
<div style={{ display: "flex", justifyContent: "space-between", alignItems: "center" }}>
<span style={{ color: selectedMethod === key ? m.color : undefined }}>{m.label}</span>
<span style={{ fontSize: 10, opacity: 0.7 }}>{m.lib}</span>
</div>
<div style={{ fontSize: 10, color: "#475569", marginTop: 4, lineHeight: 1.4, fontWeight: 400 }}>{m.description}</div>
{/* Back-Translation Extensions */}
{key === "BT" && selectedMethod === "BT" && (
<div style={{ display: "flex", gap: 6, marginTop: 10 }} onClick={(e) => e.stopPropagation()}>
{[
{ code: "en", name: "English" },
{ code: "de", name: "German" },
{ code: "cs", name: "Czech" }
].map(lang => (
<div
key={lang.code}
onClick={() => { setSelectedPivot(lang.code); reset(); }}
style={{
padding: "4px 10px", borderRadius: 6, fontFamily: "JetBrains Mono", fontSize: 10, cursor: "pointer",
border: `1px solid ${selectedPivot === lang.code ? m.color : "#334155"}`,
background: selectedPivot === lang.code ? `${m.color}22` : "transparent",
color: selectedPivot === lang.code ? m.color : "#64748b",
transition: "all 0.2s"
}}
>
{lang.name}
</div>
))}
</div>
)}
{/* EDA Extensions */}
{key === "EDA" && selectedMethod === "EDA" && (
<div style={{ marginTop: 12, padding: "12px", background: "#0f172a", borderRadius: 8, border: `1px solid ${m.color}44` }} onClick={(e) => e.stopPropagation()}>
<div style={{ display: "flex", justifyContent: "space-between", marginBottom: 6 }}>
<span style={{ fontSize: 10, color: m.color, fontFamily: "JetBrains Mono", textTransform: "uppercase", fontWeight: 700 }}>Perturbation Rate (aug_p)</span>
<span style={{ fontSize: 10, color: m.color, fontFamily: "JetBrains Mono", fontWeight: 700 }}>{Math.round(edaIntensity * 100)}%</span>
</div>
<input
type="range" min="0.01" max="0.50" step="0.01"
value={edaIntensity}
onChange={(e) => { setEdaIntensity(parseFloat(e.target.value)); reset(); }}
style={{ width: "100%", accentColor: m.color, cursor: "pointer" }}
/>
<div style={{ fontSize: 9, color: m.color, marginTop: 6, opacity: 0.8, lineHeight: 1.4, fontFamily: "JetBrains Mono" }}>
Scales the intensity of noise introduced to the lexical structure of the sentence.
</div>
</div>
)}
</button>
))}
</div>
{/* Global S-BERT Filter */}
<div style={{ marginTop: 24, padding: "16px", background: "#0f172a", borderRadius: 10, border: "1px dashed #fbbf2455" }}>
<div style={{ display: "flex", justifyContent: "space-between", marginBottom: 6 }}>
<span style={{ fontSize: 11, color: "#fbbf24", fontFamily: "JetBrains Mono", textTransform: "uppercase", fontWeight: 700 }}>Semantic Filter Threshold</span>
<span style={{ fontSize: 11, color: "#fbbf24", fontFamily: "JetBrains Mono", fontWeight: 700 }}>{Math.round(filterThreshold * 100)}%</span>
</div>
<input
type="range" min="0.5" max="0.95" step="0.05"
value={filterThreshold}
onChange={(e) => {setFilterThreshold(parseFloat(e.target.value)); reset();}}
style={{ width: "100%", accentColor: "#fbbf24", cursor: "pointer", marginTop: 4 }}
/>
<div style={{ fontSize: 10, color: "#94a3b8", marginTop: 8, lineHeight: 1.5, fontFamily: "JetBrains Mono" }}>
Minimum required Cosine Similarity (S-BERT) to prevent semantic drift and preserve original sentiment.
</div>
</div>
<div style={{ display: "flex", gap: 8, marginTop: 16 }}>
<button className="run-btn" onClick={runPipeline} disabled={running}>
{running ? <span className="pulse">PROCESSING...</span> : "▶ RUN PIPELINE"}
</button>
<button className="reset-btn" onClick={reset}>RESET PIPELINE</button>
</div>
</div>
</div>
{/* Result Panels */}
<div style={{ display: "flex", flexDirection: "column", gap: 16 }}>
<div className="card" style={{ minHeight: 180 }}>
<div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 13, color: "#94a3b8", letterSpacing: 1, textTransform: "uppercase", marginBottom: 12 }}>
Transmutation Output
</div>
<div style={{ marginBottom: 12 }}>
<div style={{ fontFamily: "JetBrains Mono", fontSize: 10, color: "#334155", marginBottom: 4, textTransform: "uppercase", letterSpacing: 1 }}>Base Corpus</div>
<div style={{ fontFamily: "JetBrains Mono", fontSize: 12, color: "#64748b", lineHeight: 1.6, padding: "8px 12px", background: "#020817", borderRadius: 8, border: "1px solid #1e293b" }}>
{selectedSentence.text}
</div>
</div>
{intermediate && selectedMethod === "BT" && (
<div className="fade-in" style={{ marginBottom: 12 }}>
<div style={{ display: "flex", alignItems: "center", gap: 8, marginBottom: 4 }}>
<div style={{ fontFamily: "JetBrains Mono", fontSize: 10, color: "#60a5fa", textTransform: "uppercase", letterSpacing: 1 }}>
Translation Vector (From: {intermediate.lang})
</div>
<div style={{ flex: 1, height: 1, background: "dashed 1px #1e293b" }} />
</div>
<div style={{ fontFamily: "JetBrains Mono", fontSize: 12, color: "#94a3b8", fontStyle: "italic", lineHeight: 1.6, padding: "8px 12px", background: "#020817", borderRadius: 8, border: `1px dashed #60a5fa44` }}>
{intermediate.text}
</div>
</div>
)}
{augmented ? (
<div className="fade-in">
<div style={{ fontFamily: "JetBrains Mono", fontSize: 10, color: AUG_METHODS[selectedMethod].color, marginBottom: 4, textTransform: "uppercase", letterSpacing: 1 }}>
Resulting Paraphrase ({selectedMethod})
</div>
<div style={{ fontFamily: "JetBrains Mono", fontSize: 12, color: "#e2e8f0", lineHeight: 1.6, padding: "8px 12px", background: "#020817", borderRadius: 8, border: `1px solid ${AUG_METHODS[selectedMethod].color}44` }}>
{augmented}
</div>
</div>
) : (
<div style={{ fontFamily: "JetBrains Mono", fontSize: 12, color: "#1e293b", fontStyle: "italic", marginTop: 8 }}>
{running && pipelineStep >= 2 ? <span className="pulse">Calculating input matrix...</span> : "Awaiting start signal..."}
</div>
)}
</div>
{similarity !== null && (
<div className={`card fade-in`} style={{ border: `1px solid ${filtered ? "#4ade8044" : "#f8717144"}` }}>
<div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 13, color: "#94a3b8", letterSpacing: 1, textTransform: "uppercase", marginBottom: 12 }}>
Quality Inspection (Sentence-BERT)
</div>
<MetricBar label="Cosine Similarity" value={similarity * 100} color={filtered ? "#4ade80" : "#f87171"} />
<MetricBar label="Acceptance Threshold" value={filterThreshold * 100} color="#fbbf24" />
<div style={{ marginTop: 12, display: "flex", alignItems: "center", gap: 10 }}>
<div style={{
padding: "6px 16px", borderRadius: 20, fontFamily: "JetBrains Mono", fontWeight: 700, fontSize: 12,
background: filtered ? "#4ade8022" : "#f8717122", color: filtered ? "#4ade80" : "#f87171",
border: `1px solid ${filtered ? "#4ade80" : "#f87171"}44`
}}>
{filtered ? "✓ ACCEPTED (No Drift)" : "✗ REJECTED (Semantic Drift)"}
</div>
<span style={{ fontFamily: "JetBrains Mono", fontSize: 11, color: "#475569" }}>
Distance: {similarity.toFixed(3)}
</span>
</div>
</div>
)}
{metrics && (
<div className="card fade-in" style={{ border: "1px solid #4ade8022" }}>
<div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 13, color: "#94a3b8", letterSpacing: 1, textTransform: "uppercase", marginBottom: 12 }}>
Model Impact (HerBERT Evaluation)
</div>
<MetricBar label="Macro-F1 (Baseline)" value={metrics.baseF1} color="#475569" />
<MetricBar label="Macro-F1 (Augmented)" value={metrics.augF1} color="#4ade80" />
<MetricBar label="Global Accuracy" value={metrics.augAcc} color="#60a5fa" />
<MetricBar label="Mean Semantic Score (SSS)" value={metrics.sss} color="#f472b6" />
<div style={{ marginTop: 12, fontFamily: "JetBrains Mono", fontSize: 11, color: "#4ade80" }}>
Δ Model Optimization: +{(metrics.augF1 - metrics.baseF1).toFixed(2)} pp.
</div>
</div>
)}
<div className="card" style={{ background: "#020817", border: "1px solid #0f1729" }}>
<div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 11, color: "#334155", letterSpacing: 2, textTransform: "uppercase", marginBottom: 8 }}>
⟩ SYSTEM LOG (FastAPI)
</div>
<div ref={logRef} style={{ height: 160, overflowY: "auto", display: "flex", flexDirection: "column", gap: 2 }}>
{logs.length === 0 ? (
<span style={{ fontFamily: "JetBrains Mono", fontSize: 11, color: "#1e293b" }}>System ready.</span>
) : logs.map((l, i) => (
<div key={i} style={{ fontFamily: "JetBrains Mono", fontSize: 11, color: l.color, display: "flex", gap: 10 }}>
<span style={{ color: "#334155", flexShrink: 0 }}>{l.ts}</span>
<span>{l.msg}</span>
</div>
))}
{running && <span className="pulse" style={{ fontFamily: "JetBrains Mono", fontSize: 11, color: "#334155" }}></span>}
</div>
</div>
</div>
</div>
</div>
)}
{/* TAB: ARCHITECTURE */}
{activeTab === "arch" && (
<div style={{ display: "flex", flexDirection: "column", gap: 16 }}>
<div className="card">
<div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 16, color: "#f8fafc", marginBottom: 4 }}>
Hybrid Pipeline Business Logic
</div>
<div style={{ fontFamily: "JetBrains Mono", fontSize: 12, color: "#475569", marginBottom: 24 }}>
Initialization → Augmentation → Semantic Verification → Fine-Tuning
</div>
{[
{
n: "01", label: "Distribution Analyzer", color: "#60a5fa",
desc: "Scans the input dataset and flags minority (long-tail) classes requiring data augmentation to prevent classifier generalization errors.",
details: ["pandas / HF Datasets", "Frequency mapping", "Input anomaly isolation"],
code: "dataset = load_dataset('polemo2-official')\nminority_classes = dataset.filter(lambda x: class_count[x['label']] < THRESHOLD)"
},
{
n: "02", label: "Augmentation Engine", color: "#f472b6",
desc: "Multi-path module generating paraphrases depending on the specificity of the analyzed sentence (LLM for complex syntax, EDA for quick noise).",
details: ["NLPAug: lexical operations", "deep-translator: cross-structures", "Groq/Llama 3: contextual inference"],
code: "def augment_pipeline(payload):\n if payload.method == 'EDA': return apply_nlpaug(payload.text)\n if payload.method == 'LLM': return groq_completion(payload.text)"
},
{
n: "03", label: "Semantic Gate (S-BERT)", color: "#fbbf24",
desc: "Defensive module preventing training data poisoning. Rejects paraphrases that have lost their original sentiment or core meaning.",
details: ["paraphrase-multilingual", "Cosine Similarity", "Semantic Drift Prevention"],
code: "embeddings = sbert_model.encode([original, augmented])\nsimilarity = cosine_similarity(embeddings[0], embeddings[1])\nif similarity >= CONFIG.threshold: return ACCEPT"
},
{
n: "04", label: "PyTorch Integration", color: "#4ade80",
desc: "Automated fine-tuning of the base HerBERT classifier on the newly generated, enriched data corpus.",
details: ["allegro/herbert-base-cased", "Tensor management", "Loss Function optimization"],
code: "model = AutoModelForSequenceClassification.from_pretrained('allegro/herbert')\ntrainer = Trainer(model=model, train_dataset=augmented_dataset)\ntrainer.train()"
},
].map((s, i) => (
<div key={s.n} style={{ display: "flex", gap: 16, marginBottom: i < 3 ? 0 : 0 }}>
<div style={{ display: "flex", flexDirection: "column", alignItems: "center" }}>
<div style={{ width: 44, height: 44, borderRadius: "50%", border: `2px solid ${s.color}`, display: "flex", alignItems: "center", justifyContent: "center", fontFamily: "JetBrains Mono", fontWeight: 700, fontSize: 13, color: s.color, background: s.color + "11", flexShrink: 0 }}>{s.n}</div>
{i < 3 && <div style={{ width: 2, flex: 1, background: `linear-gradient(${s.color}, ${["#f472b6","#fbbf24","#4ade80","transparent"][i]})`, minHeight: 24, margin: "4px 0" }} />}
</div>
<div style={{ flex: 1, paddingBottom: i < 3 ? 20 : 0 }}>
<div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 15, color: s.color, marginBottom: 6 }}>{s.label}</div>
<div style={{ fontFamily: "JetBrains Mono", fontSize: 12, color: "#94a3b8", lineHeight: 1.7, marginBottom: 10 }}>{s.desc}</div>
<div style={{ display: "flex", gap: 8, flexWrap: "wrap", marginBottom: 10 }}>
{s.details.map(d => (
<span key={d} style={{ fontFamily: "JetBrains Mono", fontSize: 10, color: s.color, background: s.color + "11", border: `1px solid ${s.color}33`, padding: "3px 8px", borderRadius: 6 }}>{d}</span>
))}
</div>
<div style={{ background: "#020817", border: `1px solid ${s.color}22`, borderRadius: 8, padding: "10px 14px" }}>
<pre style={{ fontFamily: "JetBrains Mono", fontSize: 11, color: "#64748b", margin: 0, whiteSpace: "pre-wrap", lineHeight: 1.7 }}>{s.code}</pre>
</div>
</div>
</div>
))}
</div>
</div>
)}
{/* TAB: TECH STACK */}
{activeTab === "tech" && (
<div style={{ display: "grid", gridTemplateColumns: "repeat(auto-fill, minmax(300px, 1fr))", gap: 16 }}>
{[
{
cat: "System Core", color: "#60a5fa",
items: [
{ name: "Python 3.10+", desc: "Logical foundation of the NLP environment" },
{ name: "PyTorch", desc: "Tensor computation management and backpropagation" },
{ name: "HuggingFace Transformers", desc: "Access bridge to leading language architectures" },
]
},
{
cat: "Generative Modules", color: "#f472b6",
items: [
{ name: "NLPAug", desc: "EDA rules implementation (replacement, deletion, noise)" },
{ name: "Groq Cloud (Llama 3)", desc: "Inference based on LPU architecture (Ultra-low latency)" },
{ name: "deep-translator", desc: "Network traffic management for Back-Translation" },
]
},
{
cat: "Classification Architecture", color: "#4ade80",
items: [
{ name: "HerBERT (Allegro)", desc: "Polish reference model with optimized tokenizer" },
{ name: "Sentence-Transformers", desc: "Sentence to 768-dimensional dense vector conversion" },
{ name: "AutoModelForSequenceClassification", desc: "Adapter for sentiment analysis tasks" },
]
},
{
cat: "Compute Infrastructure", color: "#c084fc",
items: [
{ name: "Apple Silicon (MPS)", desc: "PyTorch hardware acceleration on M1 Pro architecture" },
{ name: "FastAPI", desc: "High-performance asynchronous REST server coordinating the pipeline" },
{ name: "React (Vite)", desc: "Frontend module for experiment monitoring and visualization" },
]
},
{
cat: "Metrics Monitoring", color: "#fb923c",
items: [
{ name: "Macro-F1 Score", desc: "Primary metric accounting for minority class difficulties" },
{ name: "Cosine Similarity (SSS)", desc: "Assessing the rigor of semantic vector alignment" },
{ name: "scikit-learn", desc: "Advanced classification reporting and error validation" },
]
},
].map(group => (
<div key={group.cat} className="card">
<div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 13, color: group.color, letterSpacing: 1, textTransform: "uppercase", marginBottom: 14, display: "flex", alignItems: "center", gap: 8 }}>
<div style={{ width: 8, height: 8, borderRadius: "50%", background: group.color }} />
{group.cat}
</div>
<div style={{ display: "flex", flexDirection: "column", gap: 8 }}>
{group.items.map(item => (
<div key={item.name} style={{ padding: "8px 12px", background: "#020817", borderRadius: 8, border: `1px solid ${group.color}22` }}>
<div style={{ fontFamily: "JetBrains Mono", fontWeight: 600, fontSize: 12, color: "#e2e8f0", marginBottom: 2 }}>{item.name}</div>
<div style={{ fontFamily: "JetBrains Mono", fontSize: 10, color: "#475569", lineHeight: 1.5 }}>{item.desc}</div>
</div>
))}
</div>
</div>
))}
</div>
)}
{/* Footer */}
<div style={{ marginTop: 32, textAlign: "center", fontFamily: "JetBrains Mono", fontSize: 10, color: "#334155", letterSpacing: 2 }}>
MULTIMODEL DATA AUGMENTATION PIPELINE · JACEK DUSZA · MASTER'S THESIS 2026
</div>
</div>
</div>
</>
);
}