Spaces:
Sleeping
Sleeping
| import { useState, useEffect, useRef, Fragment } from "react"; | |
| const API_BASE = "https://jaaccaa-data-augmentation.hf.space"; | |
| // Fonts used in the application UI | |
| const FONTS = `@import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;700&family=Syne:wght@400;600;700;800&display=swap');`; | |
| // ── Demo Samples ────────────────────────────────────────────────────────────── | |
| // A set of sentences from the PolEmo2.0 corpus reflecting a long-tail distribution | |
| const SAMPLE_SENTENCES = [ | |
| { id: 1, text: "Produkt jest bardzo dobry i polecam go wszystkim.", label: "pozytywna", count: 142 }, | |
| { id: 2, text: "Obsługa klienta była fatalna i nieprofesjonalna.", label: "negatywna", count: 8 }, | |
| { id: 3, text: "Dostawa przyszła na czas, jestem zadowolony.", label: "pozytywna", count: 134 }, | |
| { id: 4, text: "Jakość wykonania pozostawia wiele do życzenia.", label: "negatywna", count: 11 }, | |
| { id: 5, text: "Nie mam zdania na temat tego produktu.", label: "neutralna", count: 6 }, | |
| { id: 6, text: "Cena jest adekwatna do jakości oferowanego towaru.", label: "neutralna", count: 9 }, | |
| ]; | |
| // ── Augmentation Methods Definitions ────────────────────────────────────────── | |
| const AUG_METHODS = { | |
| EDA: { | |
| label: "EDA (Lexical Rules)", | |
| color: "#4ade80", | |
| lib: "NLPAug + HerBERT", | |
| description: "Token-level perturbations: synonym replacement, random insertion, and deletion. Low computational overhead, high throughput.", | |
| }, | |
| BT: { | |
| label: "Back-Translation", | |
| color: "#60a5fa", | |
| lib: "deep-translator (Google)", | |
| description: "Round-trip translation (PL → [EN, DE, CS] → PL). Leverages multilingual embeddings to break syntactic patterns and bypass pivot-language bias.", | |
| }, | |
| LLM: { | |
| label: "Generative LLM", | |
| color: "#f472b6", | |
| lib: "Groq Cloud (Llama 3)", | |
| description: "Advanced paraphrasing based on prompt instructions for Large Language Models. Highest semantic quality powered by ultra-fast LPU inference.", | |
| }, | |
| }; | |
| // ── Helper Components ───────────────────────────────────────────────────────── | |
| function MetricBar({ label, value, color, unit = "%" }) { | |
| return ( | |
| <div style={{ marginBottom: 10 }}> | |
| <div style={{ display: "flex", justifyContent: "space-between", marginBottom: 4 }}> | |
| <span style={{ fontSize: 11, color: "#94a3b8", fontFamily: "JetBrains Mono" }}>{label}</span> | |
| <span style={{ fontSize: 12, color, fontFamily: "JetBrains Mono", fontWeight: 700 }}> | |
| {typeof value === "number" ? value.toFixed(1) : value}{unit} | |
| </span> | |
| </div> | |
| <div style={{ height: 4, background: "#1e293b", borderRadius: 2 }}> | |
| <div style={{ height: "100%", width: `${Math.min(value, 100)}%`, background: color, borderRadius: 2, transition: "width 1s ease" }} /> | |
| </div> | |
| </div> | |
| ); | |
| } | |
| function ClassBadge({ label }) { | |
| const colors = { pozytywna: "#4ade80", negatywna: "#f87171", neutralna: "#fbbf24" }; | |
| return ( | |
| <span style={{ | |
| fontSize: 10, fontFamily: "JetBrains Mono", fontWeight: 700, | |
| color: colors[label] || "#94a3b8", background: (colors[label] || "#94a3b8") + "22", | |
| border: `1px solid ${(colors[label] || "#94a3b8")}44`, | |
| padding: "2px 8px", borderRadius: 20, letterSpacing: 1, textTransform: "uppercase" | |
| }}>{label}</span> | |
| ); | |
| } | |
| function StepBadge({ step, active, done }) { | |
| return ( | |
| <div style={{ | |
| width: 32, height: 32, borderRadius: "50%", | |
| display: "flex", alignItems: "center", justifyContent: "center", | |
| fontFamily: "JetBrains Mono", fontWeight: 700, fontSize: 13, | |
| background: done ? "#4ade8033" : active ? "#f472b633" : "#1e293b", | |
| border: `2px solid ${done ? "#4ade80" : active ? "#f472b6" : "#334155"}`, | |
| color: done ? "#4ade80" : active ? "#f472b6" : "#475569", | |
| transition: "all 0.4s ease", | |
| flexShrink: 0, | |
| }}>{done ? "✓" : step}</div> | |
| ); | |
| } | |
| // ── Main Application ────────────────────────────────────────────────────────── | |
| export default function App() { | |
| const [activeTab, setActiveTab] = useState("pipeline"); | |
| const [pipelineStep, setPipelineStep] = useState(0); | |
| const [selectedSentence, setSelectedSentence] = useState(SAMPLE_SENTENCES[1]); | |
| const [selectedMethod, setSelectedMethod] = useState("LLM"); | |
| const [augmented, setAugmented] = useState(null); | |
| const [similarity, setSimilarity] = useState(null); | |
| const [filtered, setFiltered] = useState(null); | |
| const [logs, setLogs] = useState([]); | |
| const [metrics, setMetrics] = useState(null); | |
| const [running, setRunning] = useState(false); | |
| const [intermediate, setIntermediate] = useState(null); | |
| // Hyperparameters | |
| const [selectedPivot, setSelectedPivot] = useState("en"); | |
| const [edaIntensity, setEdaIntensity] = useState(0.15); | |
| const [filterThreshold, setFilterThreshold] = useState(0.80); | |
| const logRef = useRef(null); | |
| useEffect(() => { | |
| if (logRef.current) logRef.current.scrollTop = logRef.current.scrollHeight; | |
| }, [logs]); | |
| const addLog = (msg, type = "info") => { | |
| const colors = { info: "#94a3b8", success: "#4ade80", warn: "#fbbf24", error: "#f87171", accent: "#f472b6" }; | |
| setLogs((l) => [...l, { msg, color: colors[type], ts: new Date().toISOString().slice(11, 19) }]); | |
| }; | |
| const sleep = (ms) => new Promise((r) => setTimeout(r, ms)); | |
| // Frontend simulation with API integration | |
| const runPipeline = async () => { | |
| if (running) return; | |
| setRunning(true); | |
| setAugmented(null); setSimilarity(null); setFiltered(null); setMetrics(null); setIntermediate(null); | |
| setLogs([]); | |
| // 1: Data Loading | |
| setPipelineStep(1); | |
| addLog("► Initializing data pipeline...", "accent"); | |
| await sleep(600); | |
| addLog(` Corpus scanned. Detected ${SAMPLE_SENTENCES.length} defined classes.`, "info"); | |
| const minority = SAMPLE_SENTENCES.filter(s => s.count < 15); | |
| addLog(` Imbalance flag: ${minority.length} classes identified as long-tail.`, "warn"); | |
| addLog(` Isolating sample from class: [${selectedSentence.label.toUpperCase()}]`, "success"); | |
| await sleep(500); | |
| // 2: Paraphrase Generation (API Call) | |
| setPipelineStep(2); | |
| addLog(`► Executing module: ${AUG_METHODS[selectedMethod].label}`, "accent"); | |
| await sleep(400); | |
| addLog(` Inference engine: ${AUG_METHODS[selectedMethod].lib}`, "info"); | |
| let aug = ""; | |
| try { | |
| const resAug = await fetch(`${API_BASE}/augment`, { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json" }, | |
| body: JSON.stringify({ | |
| text: selectedSentence.text, | |
| method: selectedMethod, | |
| pivot_lang: selectedPivot, | |
| eda_p: edaIntensity | |
| }) | |
| }); | |
| if (!resAug.ok) throw new Error(`Status ${resAug.status}`); | |
| const dataAug = await resAug.json(); | |
| aug = dataAug.augmented; | |
| if (selectedMethod === "BT" && dataAug.intermediate) { | |
| setIntermediate({ lang: dataAug.pivot_lang.toUpperCase(), text: dataAug.intermediate }); | |
| addLog(` Pivot vector [${dataAug.pivot_lang.toUpperCase()}]: Generated successfully.`, "info"); | |
| } | |
| } catch (error) { | |
| addLog(` API CHANNEL FAILURE: No connection to base FastAPI server.`, "error"); | |
| setRunning(false); | |
| return; | |
| } | |
| setAugmented(aug); | |
| addLog(` Sentence synthesis completed.`, "success"); | |
| await sleep(400); | |
| // 3: S-BERT Filtration (API Call) | |
| setPipelineStep(3); | |
| addLog("► Calculating vector distance (Sentence-BERT)...", "accent"); | |
| let sim = 0; let pass = false; let THRESHOLD = filterThreshold; | |
| try { | |
| const resFilter = await fetch(`${API_BASE}/filter`, { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json" }, | |
| body: JSON.stringify({ | |
| original: selectedSentence.text, | |
| augmented: aug, | |
| threshold: filterThreshold | |
| }) | |
| }); | |
| const filterData = await resFilter.json(); | |
| sim = filterData.similarity; | |
| pass = filterData.passed; | |
| } catch (error) { | |
| addLog(` FILTER FAILURE: No response from microservice.`, "error"); | |
| setRunning(false); return; | |
| } | |
| setSimilarity(sim); | |
| setFiltered(pass); | |
| if (pass) { | |
| addLog(` Semantic alignment: ${(sim*100).toFixed(1)}% (Required: ${THRESHOLD*100}%) → ACCEPTED ✓`, "success"); | |
| } else { | |
| addLog(` Semantic alignment: ${(sim*100).toFixed(1)}% (Required: ${THRESHOLD*100}%) → REJECTED ✗`, "error"); | |
| addLog(" Semantic drift detected. Sample flushed from buffer.", "warn"); | |
| } | |
| await sleep(500); | |
| // 4: Training Module | |
| setPipelineStep(4); | |
| addLog("► Initializing Fine-Tuning process for base model...", "accent"); | |
| await sleep(400); | |
| addLog(" Architecture: allegro/herbert-base-cased", "info"); | |
| addLog(" Optimizer: AdamW, Learning Rate (LR): 2e-5", "info"); | |
| await sleep(900); | |
| addLog(" Epoch 1/3 — Loss: 0.487", "info"); | |
| await sleep(500); | |
| addLog(" Epoch 2/3 — Loss: 0.312", "info"); | |
| await sleep(500); | |
| addLog(" Epoch 3/3 — Loss: 0.241", "info"); | |
| await sleep(400); | |
| const baseF1 = 61.2, augF1 = pass ? 61.2 + 4 + Math.random() * 5 : 61.2 + 1.5 + Math.random() * 2; | |
| setMetrics({ | |
| baseF1, augF1, | |
| baseAcc: 74.1, augAcc: pass ? 74.1 + 3.5 + Math.random() * 3 : 74.1 + 1 + Math.random() * 2, | |
| sss: sim * 100, | |
| samplesAdded: pass ? 1 : 0, | |
| }); | |
| addLog(` Baseline Evaluation (Macro-F1): ${baseF1.toFixed(1)}%`, "info"); | |
| addLog(` Augmented Evaluation (Macro-F1): ${augF1.toFixed(1)}% (+${(augF1 - baseF1).toFixed(1)}pp) ✓`, "success"); | |
| setPipelineStep(5); | |
| addLog("■ Stream processing completed.", "accent"); | |
| setRunning(false); | |
| }; | |
| const reset = () => { | |
| setPipelineStep(0); setAugmented(null); setSimilarity(null); | |
| setFiltered(null); setMetrics(null); setLogs([]); setRunning(false); | |
| setIntermediate(null); | |
| }; | |
| const steps = [ | |
| { n: 1, label: "Loader", sublabel: "Vector distribution analysis", icon: "⬛", color: "#60a5fa" }, | |
| { n: 2, label: "Augmentor", sublabel: "Multimodel synthesis", icon: "⟳", color: "#f472b6" }, | |
| { n: 3, label: "Filter", sublabel: "S-BERT Gate", icon: "⊘", color: "#fbbf24" }, | |
| { n: 4, label: "Trainer", sublabel: "PyTorch Integration", icon: "◉", color: "#4ade80" }, | |
| ]; | |
| return ( | |
| <> | |
| <style>{` | |
| ${FONTS} | |
| * { box-sizing: border-box; margin: 0; padding: 0; } | |
| body { background: #020817; } | |
| .app { min-height: 100vh; background: #020817; color: #e2e8f0; font-family: 'Syne', sans-serif; } | |
| .noise { position: fixed; inset: 0; pointer-events: none; z-index: 0; | |
| background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)' opacity='0.04'/%3E%3C/svg%3E"); | |
| opacity: 0.6; } | |
| .grid-bg { position: fixed; inset: 0; pointer-events: none; z-index: 0; | |
| background-image: linear-gradient(#0f172a66 1px, transparent 1px), linear-gradient(90deg, #0f172a66 1px, transparent 1px); | |
| background-size: 40px 40px; } | |
| .main { position: relative; z-index: 1; max-width: 1100px; margin: 0 auto; padding: 24px 16px; } | |
| .card { background: #0f1729; border: 1px solid #1e293b; border-radius: 12px; padding: 20px; } | |
| .tab-btn { background: none; border: none; cursor: pointer; font-family: 'Syne', sans-serif; | |
| padding: 8px 18px; border-radius: 8px; font-size: 13px; font-weight: 600; letter-spacing: 0.5px; | |
| transition: all 0.2s; } | |
| .tab-btn.active { background: #1e293b; color: #f472b6; } | |
| .tab-btn:not(.active) { color: #475569; } | |
| .tab-btn:hover:not(.active) { color: #94a3b8; } | |
| .method-btn { background: none; border: 2px solid #1e293b; cursor: pointer; font-family: 'JetBrains Mono', monospace; | |
| padding: 10px 14px; border-radius: 10px; font-size: 12px; font-weight: 500; | |
| transition: all 0.2s; color: #64748b; text-align: left; } | |
| .method-btn.selected { border-color: #f472b6; background: #f472b611; color: #f472b6; } | |
| .method-btn:hover:not(.selected) { border-color: #334155; color: #94a3b8; } | |
| .run-btn { background: linear-gradient(135deg, #f472b6, #c026d3); border: none; cursor: pointer; | |
| font-family: 'Syne', sans-serif; font-weight: 700; font-size: 14px; letter-spacing: 1px; | |
| padding: 12px 32px; border-radius: 10px; color: white; transition: all 0.2s; | |
| text-transform: uppercase; } | |
| .run-btn:hover:not(:disabled) { transform: translateY(-1px); box-shadow: 0 8px 24px #f472b644; } | |
| .run-btn:disabled { opacity: 0.5; cursor: not-allowed; transform: none; } | |
| .reset-btn { background: none; border: 1px solid #334155; cursor: pointer; | |
| font-family: 'JetBrains Mono', monospace; font-size: 12px; padding: 8px 18px; border-radius: 8px; | |
| color: #64748b; transition: all 0.2s; } | |
| .reset-btn:hover { border-color: #475569; color: #94a3b8; } | |
| .sentence-btn { background: none; border: 1px solid #1e293b; cursor: pointer; border-radius: 8px; | |
| padding: 10px 14px; transition: all 0.2s; text-align: left; width: 100%; } | |
| .sentence-btn.selected { border-color: #60a5fa; background: #60a5fa11; } | |
| .sentence-btn:hover:not(.selected) { border-color: #334155; } | |
| .pulse { animation: pulse 1.5s infinite; } | |
| @keyframes pulse { 0%,100% { opacity: 1; } 50% { opacity: 0.4; } } | |
| .fade-in { animation: fadeIn 0.5s ease; } | |
| @keyframes fadeIn { from { opacity: 0; transform: translateY(8px); } to { opacity: 1; transform: none; } } | |
| .connector { flex: 1; height: 2px; background: linear-gradient(90deg, #1e293b, #334155); margin: 0 4px; } | |
| .connector.active { background: linear-gradient(90deg, #f472b6, #c026d3); } | |
| .connector.done { background: #4ade80; } | |
| ::-webkit-scrollbar { width: 4px; } | |
| ::-webkit-scrollbar-track { background: #0f1729; } | |
| ::-webkit-scrollbar-thumb { background: #334155; border-radius: 2px; } | |
| `}</style> | |
| <div className="app"> | |
| <div className="noise" /> | |
| <div className="grid-bg" /> | |
| <div className="main"> | |
| {/* Academic Header */} | |
| <div style={{ marginBottom: 32 }}> | |
| <div style={{ display: "flex", alignItems: "flex-start", justifyContent: "space-between", flexWrap: "wrap", gap: 12 }}> | |
| <div> | |
| <div style={{ fontFamily: "JetBrains Mono", fontSize: 11, color: "#f472b6", letterSpacing: 3, textTransform: "uppercase", marginBottom: 8 }}> | |
| ◈ Cybersecurity · UKEN · Jacek Dusza · 2026 | |
| </div> | |
| <h1 style={{ fontFamily: "Syne", fontWeight: 800, fontSize: "clamp(22px,4vw,34px)", lineHeight: 1.1, color: "#f8fafc", letterSpacing: -0.5 }}> | |
| Multimodel Data Augmentation Engine | |
| <span style={{ display: "block", color: "#f472b6" }}>Sentiment Analysis (PL)</span> | |
| </h1> | |
| <p style={{ marginTop: 10, color: "#64748b", fontFamily: "JetBrains Mono", fontSize: 12 }}> | |
| HerBERT · NLPAug · Groq API · Sentence-BERT · deep-translator | |
| </p> | |
| </div> | |
| <div style={{ display: "flex", gap: 8, flexWrap: "wrap" }}> | |
| {[ | |
| { label: "Classes", val: "6" }, { label: "Long-tail", val: "4" }, | |
| { label: "Arch.", val: "Hybrid" }, { label: "Methods", val: "3" } | |
| ].map(({ label, val }) => ( | |
| <div key={label} className="card" style={{ padding: "10px 16px", textAlign: "center", minWidth: 70 }}> | |
| <div style={{ fontFamily: "JetBrains Mono", fontWeight: 700, fontSize: 18, color: "#f8fafc" }}>{val}</div> | |
| <div style={{ fontFamily: "JetBrains Mono", fontSize: 10, color: "#475569", letterSpacing: 1, textTransform: "uppercase" }}>{label}</div> | |
| </div> | |
| ))} | |
| </div> | |
| </div> | |
| {/* Navigation Tabs */} | |
| <div style={{ display: "flex", gap: 4, marginTop: 24, borderBottom: "1px solid #1e293b", paddingBottom: 0 }}> | |
| {[ | |
| { id: "pipeline", label: "▶ Control Panel" }, | |
| { id: "arch", label: "◈ Architecture" }, | |
| { id: "tech", label: "⊞ Tech Stack" }, | |
| ].map(t => ( | |
| <button key={t.id} className={`tab-btn ${activeTab === t.id ? "active" : ""}`} | |
| onClick={() => setActiveTab(t.id)} style={{ borderBottom: activeTab === t.id ? "2px solid #f472b6" : "2px solid transparent", borderRadius: "8px 8px 0 0" }}> | |
| {t.label} | |
| </button> | |
| ))} | |
| </div> | |
| </div> | |
| {/* TAB: CONTROL PANEL */} | |
| {activeTab === "pipeline" && ( | |
| <div style={{ display: "flex", flexDirection: "column", gap: 16 }}> | |
| <div className="card"> | |
| <div style={{ display: "flex", alignItems: "center", gap: 0 }}> | |
| {steps.map((s, i) => ( | |
| <Fragment key={s.n}> | |
| <div style={{ display: "flex", flexDirection: "column", alignItems: "center", minWidth: 70 }}> | |
| <StepBadge step={s.n} active={pipelineStep === s.n} done={pipelineStep > s.n} /> | |
| <div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 11, color: pipelineStep >= s.n ? s.color : "#334155", marginTop: 6, textAlign: "center" }}>{s.label}</div> | |
| <div style={{ fontFamily: "JetBrains Mono", fontSize: 9, color: "#334155", textAlign: "center", maxWidth: 70 }}>{s.sublabel}</div> | |
| </div> | |
| {i < steps.length - 1 && ( | |
| <div className={`connector ${pipelineStep > i + 1 ? "done" : pipelineStep === i + 1 ? "active" : ""}`} /> | |
| )} | |
| </Fragment> | |
| ))} | |
| </div> | |
| </div> | |
| <div style={{ display: "grid", gridTemplateColumns: "1fr 1fr", gap: 16 }}> | |
| <div style={{ display: "flex", flexDirection: "column", gap: 16 }}> | |
| <div className="card"> | |
| <div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 13, color: "#94a3b8", letterSpacing: 1, textTransform: "uppercase", marginBottom: 12 }}> | |
| 1. Input Vector Initialization | |
| </div> | |
| <div style={{ display: "flex", flexDirection: "column", gap: 6 }}> | |
| {SAMPLE_SENTENCES.map(s => ( | |
| <button key={s.id} className={`sentence-btn ${selectedSentence.id === s.id ? "selected" : ""}`} | |
| onClick={() => { setSelectedSentence(s); reset(); }}> | |
| <div style={{ display: "flex", justifyContent: "space-between", alignItems: "center", marginBottom: 4 }}> | |
| <ClassBadge label={s.label} /> | |
| <span style={{ fontFamily: "JetBrains Mono", fontSize: 10, color: s.count < 15 ? "#f87171" : "#475569" }}> | |
| {s.count} samples {s.count < 15 ? "⚠" : ""} | |
| </span> | |
| </div> | |
| <div style={{ fontFamily: "JetBrains Mono", fontSize: 11, color: "#94a3b8", lineHeight: 1.5 }}>{s.text}</div> | |
| </button> | |
| ))} | |
| </div> | |
| </div> | |
| <div className="card"> | |
| <div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 13, color: "#94a3b8", letterSpacing: 1, textTransform: "uppercase", marginBottom: 12 }}> | |
| 2. Augmentation Algorithm Setup | |
| </div> | |
| <div style={{ display: "flex", flexDirection: "column", gap: 8 }}> | |
| {Object.entries(AUG_METHODS).map(([key, m]) => ( | |
| <button key={key} className={`method-btn ${selectedMethod === key ? "selected" : ""}`} | |
| onClick={() => { setSelectedMethod(key); reset(); }}> | |
| <div style={{ display: "flex", justifyContent: "space-between", alignItems: "center" }}> | |
| <span style={{ color: selectedMethod === key ? m.color : undefined }}>{m.label}</span> | |
| <span style={{ fontSize: 10, opacity: 0.7 }}>{m.lib}</span> | |
| </div> | |
| <div style={{ fontSize: 10, color: "#475569", marginTop: 4, lineHeight: 1.4, fontWeight: 400 }}>{m.description}</div> | |
| {/* Back-Translation Extensions */} | |
| {key === "BT" && selectedMethod === "BT" && ( | |
| <div style={{ display: "flex", gap: 6, marginTop: 10 }} onClick={(e) => e.stopPropagation()}> | |
| {[ | |
| { code: "en", name: "English" }, | |
| { code: "de", name: "German" }, | |
| { code: "cs", name: "Czech" } | |
| ].map(lang => ( | |
| <div | |
| key={lang.code} | |
| onClick={() => { setSelectedPivot(lang.code); reset(); }} | |
| style={{ | |
| padding: "4px 10px", borderRadius: 6, fontFamily: "JetBrains Mono", fontSize: 10, cursor: "pointer", | |
| border: `1px solid ${selectedPivot === lang.code ? m.color : "#334155"}`, | |
| background: selectedPivot === lang.code ? `${m.color}22` : "transparent", | |
| color: selectedPivot === lang.code ? m.color : "#64748b", | |
| transition: "all 0.2s" | |
| }} | |
| > | |
| {lang.name} | |
| </div> | |
| ))} | |
| </div> | |
| )} | |
| {/* EDA Extensions */} | |
| {key === "EDA" && selectedMethod === "EDA" && ( | |
| <div style={{ marginTop: 12, padding: "12px", background: "#0f172a", borderRadius: 8, border: `1px solid ${m.color}44` }} onClick={(e) => e.stopPropagation()}> | |
| <div style={{ display: "flex", justifyContent: "space-between", marginBottom: 6 }}> | |
| <span style={{ fontSize: 10, color: m.color, fontFamily: "JetBrains Mono", textTransform: "uppercase", fontWeight: 700 }}>Perturbation Rate (aug_p)</span> | |
| <span style={{ fontSize: 10, color: m.color, fontFamily: "JetBrains Mono", fontWeight: 700 }}>{Math.round(edaIntensity * 100)}%</span> | |
| </div> | |
| <input | |
| type="range" min="0.01" max="0.50" step="0.01" | |
| value={edaIntensity} | |
| onChange={(e) => { setEdaIntensity(parseFloat(e.target.value)); reset(); }} | |
| style={{ width: "100%", accentColor: m.color, cursor: "pointer" }} | |
| /> | |
| <div style={{ fontSize: 9, color: m.color, marginTop: 6, opacity: 0.8, lineHeight: 1.4, fontFamily: "JetBrains Mono" }}> | |
| Scales the intensity of noise introduced to the lexical structure of the sentence. | |
| </div> | |
| </div> | |
| )} | |
| </button> | |
| ))} | |
| </div> | |
| {/* Global S-BERT Filter */} | |
| <div style={{ marginTop: 24, padding: "16px", background: "#0f172a", borderRadius: 10, border: "1px dashed #fbbf2455" }}> | |
| <div style={{ display: "flex", justifyContent: "space-between", marginBottom: 6 }}> | |
| <span style={{ fontSize: 11, color: "#fbbf24", fontFamily: "JetBrains Mono", textTransform: "uppercase", fontWeight: 700 }}>Semantic Filter Threshold</span> | |
| <span style={{ fontSize: 11, color: "#fbbf24", fontFamily: "JetBrains Mono", fontWeight: 700 }}>{Math.round(filterThreshold * 100)}%</span> | |
| </div> | |
| <input | |
| type="range" min="0.5" max="0.95" step="0.05" | |
| value={filterThreshold} | |
| onChange={(e) => {setFilterThreshold(parseFloat(e.target.value)); reset();}} | |
| style={{ width: "100%", accentColor: "#fbbf24", cursor: "pointer", marginTop: 4 }} | |
| /> | |
| <div style={{ fontSize: 10, color: "#94a3b8", marginTop: 8, lineHeight: 1.5, fontFamily: "JetBrains Mono" }}> | |
| Minimum required Cosine Similarity (S-BERT) to prevent semantic drift and preserve original sentiment. | |
| </div> | |
| </div> | |
| <div style={{ display: "flex", gap: 8, marginTop: 16 }}> | |
| <button className="run-btn" onClick={runPipeline} disabled={running}> | |
| {running ? <span className="pulse">PROCESSING...</span> : "▶ RUN PIPELINE"} | |
| </button> | |
| <button className="reset-btn" onClick={reset}>RESET PIPELINE</button> | |
| </div> | |
| </div> | |
| </div> | |
| {/* Result Panels */} | |
| <div style={{ display: "flex", flexDirection: "column", gap: 16 }}> | |
| <div className="card" style={{ minHeight: 180 }}> | |
| <div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 13, color: "#94a3b8", letterSpacing: 1, textTransform: "uppercase", marginBottom: 12 }}> | |
| Transmutation Output | |
| </div> | |
| <div style={{ marginBottom: 12 }}> | |
| <div style={{ fontFamily: "JetBrains Mono", fontSize: 10, color: "#334155", marginBottom: 4, textTransform: "uppercase", letterSpacing: 1 }}>Base Corpus</div> | |
| <div style={{ fontFamily: "JetBrains Mono", fontSize: 12, color: "#64748b", lineHeight: 1.6, padding: "8px 12px", background: "#020817", borderRadius: 8, border: "1px solid #1e293b" }}> | |
| {selectedSentence.text} | |
| </div> | |
| </div> | |
| {intermediate && selectedMethod === "BT" && ( | |
| <div className="fade-in" style={{ marginBottom: 12 }}> | |
| <div style={{ display: "flex", alignItems: "center", gap: 8, marginBottom: 4 }}> | |
| <div style={{ fontFamily: "JetBrains Mono", fontSize: 10, color: "#60a5fa", textTransform: "uppercase", letterSpacing: 1 }}> | |
| Translation Vector (From: {intermediate.lang}) | |
| </div> | |
| <div style={{ flex: 1, height: 1, background: "dashed 1px #1e293b" }} /> | |
| </div> | |
| <div style={{ fontFamily: "JetBrains Mono", fontSize: 12, color: "#94a3b8", fontStyle: "italic", lineHeight: 1.6, padding: "8px 12px", background: "#020817", borderRadius: 8, border: `1px dashed #60a5fa44` }}> | |
| {intermediate.text} | |
| </div> | |
| </div> | |
| )} | |
| {augmented ? ( | |
| <div className="fade-in"> | |
| <div style={{ fontFamily: "JetBrains Mono", fontSize: 10, color: AUG_METHODS[selectedMethod].color, marginBottom: 4, textTransform: "uppercase", letterSpacing: 1 }}> | |
| Resulting Paraphrase ({selectedMethod}) | |
| </div> | |
| <div style={{ fontFamily: "JetBrains Mono", fontSize: 12, color: "#e2e8f0", lineHeight: 1.6, padding: "8px 12px", background: "#020817", borderRadius: 8, border: `1px solid ${AUG_METHODS[selectedMethod].color}44` }}> | |
| {augmented} | |
| </div> | |
| </div> | |
| ) : ( | |
| <div style={{ fontFamily: "JetBrains Mono", fontSize: 12, color: "#1e293b", fontStyle: "italic", marginTop: 8 }}> | |
| {running && pipelineStep >= 2 ? <span className="pulse">Calculating input matrix...</span> : "Awaiting start signal..."} | |
| </div> | |
| )} | |
| </div> | |
| {similarity !== null && ( | |
| <div className={`card fade-in`} style={{ border: `1px solid ${filtered ? "#4ade8044" : "#f8717144"}` }}> | |
| <div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 13, color: "#94a3b8", letterSpacing: 1, textTransform: "uppercase", marginBottom: 12 }}> | |
| Quality Inspection (Sentence-BERT) | |
| </div> | |
| <MetricBar label="Cosine Similarity" value={similarity * 100} color={filtered ? "#4ade80" : "#f87171"} /> | |
| <MetricBar label="Acceptance Threshold" value={filterThreshold * 100} color="#fbbf24" /> | |
| <div style={{ marginTop: 12, display: "flex", alignItems: "center", gap: 10 }}> | |
| <div style={{ | |
| padding: "6px 16px", borderRadius: 20, fontFamily: "JetBrains Mono", fontWeight: 700, fontSize: 12, | |
| background: filtered ? "#4ade8022" : "#f8717122", color: filtered ? "#4ade80" : "#f87171", | |
| border: `1px solid ${filtered ? "#4ade80" : "#f87171"}44` | |
| }}> | |
| {filtered ? "✓ ACCEPTED (No Drift)" : "✗ REJECTED (Semantic Drift)"} | |
| </div> | |
| <span style={{ fontFamily: "JetBrains Mono", fontSize: 11, color: "#475569" }}> | |
| Distance: {similarity.toFixed(3)} | |
| </span> | |
| </div> | |
| </div> | |
| )} | |
| {metrics && ( | |
| <div className="card fade-in" style={{ border: "1px solid #4ade8022" }}> | |
| <div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 13, color: "#94a3b8", letterSpacing: 1, textTransform: "uppercase", marginBottom: 12 }}> | |
| Model Impact (HerBERT Evaluation) | |
| </div> | |
| <MetricBar label="Macro-F1 (Baseline)" value={metrics.baseF1} color="#475569" /> | |
| <MetricBar label="Macro-F1 (Augmented)" value={metrics.augF1} color="#4ade80" /> | |
| <MetricBar label="Global Accuracy" value={metrics.augAcc} color="#60a5fa" /> | |
| <MetricBar label="Mean Semantic Score (SSS)" value={metrics.sss} color="#f472b6" /> | |
| <div style={{ marginTop: 12, fontFamily: "JetBrains Mono", fontSize: 11, color: "#4ade80" }}> | |
| Δ Model Optimization: +{(metrics.augF1 - metrics.baseF1).toFixed(2)} pp. | |
| </div> | |
| </div> | |
| )} | |
| <div className="card" style={{ background: "#020817", border: "1px solid #0f1729" }}> | |
| <div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 11, color: "#334155", letterSpacing: 2, textTransform: "uppercase", marginBottom: 8 }}> | |
| ⟩ SYSTEM LOG (FastAPI) | |
| </div> | |
| <div ref={logRef} style={{ height: 160, overflowY: "auto", display: "flex", flexDirection: "column", gap: 2 }}> | |
| {logs.length === 0 ? ( | |
| <span style={{ fontFamily: "JetBrains Mono", fontSize: 11, color: "#1e293b" }}>System ready.</span> | |
| ) : logs.map((l, i) => ( | |
| <div key={i} style={{ fontFamily: "JetBrains Mono", fontSize: 11, color: l.color, display: "flex", gap: 10 }}> | |
| <span style={{ color: "#334155", flexShrink: 0 }}>{l.ts}</span> | |
| <span>{l.msg}</span> | |
| </div> | |
| ))} | |
| {running && <span className="pulse" style={{ fontFamily: "JetBrains Mono", fontSize: 11, color: "#334155" }}>█</span>} | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| )} | |
| {/* TAB: ARCHITECTURE */} | |
| {activeTab === "arch" && ( | |
| <div style={{ display: "flex", flexDirection: "column", gap: 16 }}> | |
| <div className="card"> | |
| <div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 16, color: "#f8fafc", marginBottom: 4 }}> | |
| Hybrid Pipeline Business Logic | |
| </div> | |
| <div style={{ fontFamily: "JetBrains Mono", fontSize: 12, color: "#475569", marginBottom: 24 }}> | |
| Initialization → Augmentation → Semantic Verification → Fine-Tuning | |
| </div> | |
| {[ | |
| { | |
| n: "01", label: "Distribution Analyzer", color: "#60a5fa", | |
| desc: "Scans the input dataset and flags minority (long-tail) classes requiring data augmentation to prevent classifier generalization errors.", | |
| details: ["pandas / HF Datasets", "Frequency mapping", "Input anomaly isolation"], | |
| code: "dataset = load_dataset('polemo2-official')\nminority_classes = dataset.filter(lambda x: class_count[x['label']] < THRESHOLD)" | |
| }, | |
| { | |
| n: "02", label: "Augmentation Engine", color: "#f472b6", | |
| desc: "Multi-path module generating paraphrases depending on the specificity of the analyzed sentence (LLM for complex syntax, EDA for quick noise).", | |
| details: ["NLPAug: lexical operations", "deep-translator: cross-structures", "Groq/Llama 3: contextual inference"], | |
| code: "def augment_pipeline(payload):\n if payload.method == 'EDA': return apply_nlpaug(payload.text)\n if payload.method == 'LLM': return groq_completion(payload.text)" | |
| }, | |
| { | |
| n: "03", label: "Semantic Gate (S-BERT)", color: "#fbbf24", | |
| desc: "Defensive module preventing training data poisoning. Rejects paraphrases that have lost their original sentiment or core meaning.", | |
| details: ["paraphrase-multilingual", "Cosine Similarity", "Semantic Drift Prevention"], | |
| code: "embeddings = sbert_model.encode([original, augmented])\nsimilarity = cosine_similarity(embeddings[0], embeddings[1])\nif similarity >= CONFIG.threshold: return ACCEPT" | |
| }, | |
| { | |
| n: "04", label: "PyTorch Integration", color: "#4ade80", | |
| desc: "Automated fine-tuning of the base HerBERT classifier on the newly generated, enriched data corpus.", | |
| details: ["allegro/herbert-base-cased", "Tensor management", "Loss Function optimization"], | |
| code: "model = AutoModelForSequenceClassification.from_pretrained('allegro/herbert')\ntrainer = Trainer(model=model, train_dataset=augmented_dataset)\ntrainer.train()" | |
| }, | |
| ].map((s, i) => ( | |
| <div key={s.n} style={{ display: "flex", gap: 16, marginBottom: i < 3 ? 0 : 0 }}> | |
| <div style={{ display: "flex", flexDirection: "column", alignItems: "center" }}> | |
| <div style={{ width: 44, height: 44, borderRadius: "50%", border: `2px solid ${s.color}`, display: "flex", alignItems: "center", justifyContent: "center", fontFamily: "JetBrains Mono", fontWeight: 700, fontSize: 13, color: s.color, background: s.color + "11", flexShrink: 0 }}>{s.n}</div> | |
| {i < 3 && <div style={{ width: 2, flex: 1, background: `linear-gradient(${s.color}, ${["#f472b6","#fbbf24","#4ade80","transparent"][i]})`, minHeight: 24, margin: "4px 0" }} />} | |
| </div> | |
| <div style={{ flex: 1, paddingBottom: i < 3 ? 20 : 0 }}> | |
| <div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 15, color: s.color, marginBottom: 6 }}>{s.label}</div> | |
| <div style={{ fontFamily: "JetBrains Mono", fontSize: 12, color: "#94a3b8", lineHeight: 1.7, marginBottom: 10 }}>{s.desc}</div> | |
| <div style={{ display: "flex", gap: 8, flexWrap: "wrap", marginBottom: 10 }}> | |
| {s.details.map(d => ( | |
| <span key={d} style={{ fontFamily: "JetBrains Mono", fontSize: 10, color: s.color, background: s.color + "11", border: `1px solid ${s.color}33`, padding: "3px 8px", borderRadius: 6 }}>{d}</span> | |
| ))} | |
| </div> | |
| <div style={{ background: "#020817", border: `1px solid ${s.color}22`, borderRadius: 8, padding: "10px 14px" }}> | |
| <pre style={{ fontFamily: "JetBrains Mono", fontSize: 11, color: "#64748b", margin: 0, whiteSpace: "pre-wrap", lineHeight: 1.7 }}>{s.code}</pre> | |
| </div> | |
| </div> | |
| </div> | |
| ))} | |
| </div> | |
| </div> | |
| )} | |
| {/* TAB: TECH STACK */} | |
| {activeTab === "tech" && ( | |
| <div style={{ display: "grid", gridTemplateColumns: "repeat(auto-fill, minmax(300px, 1fr))", gap: 16 }}> | |
| {[ | |
| { | |
| cat: "System Core", color: "#60a5fa", | |
| items: [ | |
| { name: "Python 3.10+", desc: "Logical foundation of the NLP environment" }, | |
| { name: "PyTorch", desc: "Tensor computation management and backpropagation" }, | |
| { name: "HuggingFace Transformers", desc: "Access bridge to leading language architectures" }, | |
| ] | |
| }, | |
| { | |
| cat: "Generative Modules", color: "#f472b6", | |
| items: [ | |
| { name: "NLPAug", desc: "EDA rules implementation (replacement, deletion, noise)" }, | |
| { name: "Groq Cloud (Llama 3)", desc: "Inference based on LPU architecture (Ultra-low latency)" }, | |
| { name: "deep-translator", desc: "Network traffic management for Back-Translation" }, | |
| ] | |
| }, | |
| { | |
| cat: "Classification Architecture", color: "#4ade80", | |
| items: [ | |
| { name: "HerBERT (Allegro)", desc: "Polish reference model with optimized tokenizer" }, | |
| { name: "Sentence-Transformers", desc: "Sentence to 768-dimensional dense vector conversion" }, | |
| { name: "AutoModelForSequenceClassification", desc: "Adapter for sentiment analysis tasks" }, | |
| ] | |
| }, | |
| { | |
| cat: "Compute Infrastructure", color: "#c084fc", | |
| items: [ | |
| { name: "Apple Silicon (MPS)", desc: "PyTorch hardware acceleration on M1 Pro architecture" }, | |
| { name: "FastAPI", desc: "High-performance asynchronous REST server coordinating the pipeline" }, | |
| { name: "React (Vite)", desc: "Frontend module for experiment monitoring and visualization" }, | |
| ] | |
| }, | |
| { | |
| cat: "Metrics Monitoring", color: "#fb923c", | |
| items: [ | |
| { name: "Macro-F1 Score", desc: "Primary metric accounting for minority class difficulties" }, | |
| { name: "Cosine Similarity (SSS)", desc: "Assessing the rigor of semantic vector alignment" }, | |
| { name: "scikit-learn", desc: "Advanced classification reporting and error validation" }, | |
| ] | |
| }, | |
| ].map(group => ( | |
| <div key={group.cat} className="card"> | |
| <div style={{ fontFamily: "Syne", fontWeight: 700, fontSize: 13, color: group.color, letterSpacing: 1, textTransform: "uppercase", marginBottom: 14, display: "flex", alignItems: "center", gap: 8 }}> | |
| <div style={{ width: 8, height: 8, borderRadius: "50%", background: group.color }} /> | |
| {group.cat} | |
| </div> | |
| <div style={{ display: "flex", flexDirection: "column", gap: 8 }}> | |
| {group.items.map(item => ( | |
| <div key={item.name} style={{ padding: "8px 12px", background: "#020817", borderRadius: 8, border: `1px solid ${group.color}22` }}> | |
| <div style={{ fontFamily: "JetBrains Mono", fontWeight: 600, fontSize: 12, color: "#e2e8f0", marginBottom: 2 }}>{item.name}</div> | |
| <div style={{ fontFamily: "JetBrains Mono", fontSize: 10, color: "#475569", lineHeight: 1.5 }}>{item.desc}</div> | |
| </div> | |
| ))} | |
| </div> | |
| </div> | |
| ))} | |
| </div> | |
| )} | |
| {/* Footer */} | |
| <div style={{ marginTop: 32, textAlign: "center", fontFamily: "JetBrains Mono", fontSize: 10, color: "#334155", letterSpacing: 2 }}> | |
| MULTIMODEL DATA AUGMENTATION PIPELINE · JACEK DUSZA · MASTER'S THESIS 2026 | |
| </div> | |
| </div> | |
| </div> | |
| </> | |
| ); | |
| } |