sentimeter / js /cleaning.js
rhmnsae's picture
up
3cc3895
'use strict';
SM.injectLayout('nav-cleaning');
SM.setChartDefaults();
const store = SM.loadData();
if (!store) {
window.location.replace('upload');
throw new Error('No data β€” redirecting to upload');
}
const { rows, meta } = store;
document.getElementById('topbarMeta').textContent = `${meta.filename} β€” ${rows.length} tweets`;
// ── Pipeline Steps Controls ──
const stepEnabled = Object.fromEntries(SM.PIPELINE_STEPS.map(s => [s.id, true]));
document.getElementById('pipelineSteps').innerHTML = SM.PIPELINE_STEPS.map((s,i) => `
<div class="pipeline-step">
<div class="step-num">${i+1}</div>
<div class="step-info">
<div class="step-label">${s.label}</div>
<div class="step-desc">${s.desc}</div>
</div>
<div class="step-toggle on" id="toggle_${s.id}" data-step="${s.id}"></div>
</div>
`).join('');
document.querySelectorAll('.step-toggle').forEach(tog => {
tog.addEventListener('click', () => {
const id = tog.dataset.step;
stepEnabled[id] = !stepEnabled[id];
tog.classList.toggle('on', stepEnabled[id]);
updateDemo();
});
});
// ── Custom clean using enabled steps only ──
function cleanCustom(raw) {
let t = raw || '';
for (const s of SM.PIPELINE_STEPS) {
if (stepEnabled[s.id]) t = SM.cleanStep(t, s.id);
}
return t;
}
// ── Demo Textarea ──
function updateDemo() {
const raw = document.getElementById('demoInput').value;
if (!raw.trim()) { document.getElementById('stepPipeline').innerHTML = ''; return; }
let t = raw;
const lines = SM.PIPELINE_STEPS.map((s,i) => {
const before = t;
if (stepEnabled[s.id]) t = SM.cleanStep(t, s.id);
const changed = before !== t;
const removed = before.length - t.length;
return `<div class="step-line ${changed?'changed':''}">
<div class="step-line-num">${i+1}</div>
<div class="step-line-name">${s.label.replace(/\d+\.\s/,'')}</div>
<div class="step-line-text">${SM.esc(t)||'<em style="color:var(--tx3)">(kosong)</em>'}</div>
<div class="step-diff">${changed?`βˆ’${removed} char`:'sama'}</div>
</div>`;
});
document.getElementById('stepPipeline').innerHTML = lines.join('');
}
const demoInput = document.getElementById('demoInput');
demoInput.addEventListener('input', updateDemo);
// Default example
demoInput.value = rows.length ? rows[0].raw : '@liputan6dotcom Gak sia-sia mendukung #Prabowo-Gibran! https://t.co/abc123 😍 Data ekonomi tumbuh 5.17%';
updateDemo();
// ── Dataset Stats ──
const allBefore = rows.map(r => r.wordsBefore);
const allAfter = rows.map(r => r.wordsAfter);
const avgBefore = SM.avg(allBefore).toFixed(1);
const avgAfter = SM.avg(allAfter).toFixed(1);
const avgReduction = rows.map(r => r.wordsBefore > 0 ? Math.round((1-r.wordsAfter/r.wordsBefore)*100) : 0);
const avgRed = SM.avg(avgReduction).toFixed(1);
const emptyAfter = rows.filter(r => r.wordsAfter === 0).length;
document.getElementById('cleaningStats').innerHTML = [
{ label:'Rata-rata Kata Sebelum', value: avgBefore },
{ label:'Rata-rata Kata Sesudah', value: avgAfter },
{ label:'Rata-rata Reduksi', value: avgRed + '%' },
{ label:'Teks Kosong Setelah', value: emptyAfter },
{ label:'Stopwords Digunakan', value: SM.STOPWORDS.size },
{ label:'Total Token Unik', value: new Set(rows.flatMap(r=>r.cleaned.split(' ').filter(Boolean))).size },
].map(s => `<span class="stat-pill"><strong>${s.value}</strong> ${s.label}</span>`).join('');
// ── Reduction Distribution Chart ──
const redBins = Array(10).fill(0);
avgReduction.forEach(v => { const i=Math.min(Math.floor(v/10),9); redBins[i]++; });
SM.mkChart('chartReduction', {
type:'bar',
data:{ labels:redBins.map((_,i)=>`${i*10}–${(i+1)*10}%`),
datasets:[{ label:'Tweet',data:redBins,
backgroundColor:redBins.map((_,i)=>i<3?SM.C.neg:i<7?SM.C.neu:SM.C.pos),
borderWidth:0, borderRadius:3 }]},
options:{ responsive:true, maintainAspectRatio:false,
plugins:{legend:{display:false}},
scales:{ x:{grid:{color:SM.gridColor},ticks:{font:{size:9}}}, y:{grid:{color:SM.gridColor},beginAtZero:true} }
}
});
// ── Top Words Chart ──
const wordFreq = {};
rows.forEach(r => {
r.cleaned.split(/\s+/).filter(Boolean).forEach(w => { wordFreq[w]=(wordFreq[w]||0)+1; });
});
const topWords = Object.entries(wordFreq).sort((a,b)=>b[1]-a[1]).slice(0,15);
SM.mkChart('chartWords', {
type:'bar',
data:{ labels:topWords.map(([k])=>k),
datasets:[{ label:'Frekuensi',data:topWords.map(([,v])=>v),
backgroundColor:SM.C.a1d, borderColor:SM.C.a1, borderWidth:1, borderRadius:3 }]},
options:{ indexAxis:'y', responsive:true, maintainAspectRatio:false,
plugins:{legend:{display:false}},
scales:{ x:{grid:{color:SM.gridColor},beginAtZero:true}, y:{grid:{display:false},ticks:{font:{size:10}}} }
}
});
// ── Before/After Table ──
document.getElementById('cleanTableBody').innerHTML = rows.slice(0,20).map(r => {
const pct = r.wordsBefore ? Math.round((1-r.wordsAfter/r.wordsBefore)*100) : 0;
const pctCol = pct>70?'var(--pos)':pct>40?'var(--neu)':'var(--neg)';
return `<tr>
<td class="td-no">${r.id}</td>
<td class="td-trunc" title="${SM.esc(r.raw)}">${SM.esc(r.raw.slice(0,90))}${r.raw.length>90?'…':''}</td>
<td class="td-trunc" title="${SM.esc(r.cleaned)}">${r.cleaned?SM.esc(r.cleaned.slice(0,70))+'…':'<em style="color:var(--tx3)">(kosong)</em>'}</td>
<td style="text-align:center">${r.wordsBefore}</td>
<td style="text-align:center">${r.wordsAfter}</td>
<td style="text-align:center;font-weight:600;color:${pctCol}">${pct}%</td>
</tr>`;
}).join('');