Spaces:
Running
Running
| ; | |
| SM.injectLayout('nav-cleaning'); | |
| SM.setChartDefaults(); | |
| const store = SM.loadData(); | |
| if (!store) { | |
| window.location.replace('upload'); | |
| throw new Error('No data β redirecting to upload'); | |
| } | |
| const { rows, meta } = store; | |
| document.getElementById('topbarMeta').textContent = `${meta.filename} β ${rows.length} tweets`; | |
| // ββ Pipeline Steps Controls ββ | |
| const stepEnabled = Object.fromEntries(SM.PIPELINE_STEPS.map(s => [s.id, true])); | |
| document.getElementById('pipelineSteps').innerHTML = SM.PIPELINE_STEPS.map((s,i) => ` | |
| <div class="pipeline-step"> | |
| <div class="step-num">${i+1}</div> | |
| <div class="step-info"> | |
| <div class="step-label">${s.label}</div> | |
| <div class="step-desc">${s.desc}</div> | |
| </div> | |
| <div class="step-toggle on" id="toggle_${s.id}" data-step="${s.id}"></div> | |
| </div> | |
| `).join(''); | |
| document.querySelectorAll('.step-toggle').forEach(tog => { | |
| tog.addEventListener('click', () => { | |
| const id = tog.dataset.step; | |
| stepEnabled[id] = !stepEnabled[id]; | |
| tog.classList.toggle('on', stepEnabled[id]); | |
| updateDemo(); | |
| }); | |
| }); | |
| // ββ Custom clean using enabled steps only ββ | |
| function cleanCustom(raw) { | |
| let t = raw || ''; | |
| for (const s of SM.PIPELINE_STEPS) { | |
| if (stepEnabled[s.id]) t = SM.cleanStep(t, s.id); | |
| } | |
| return t; | |
| } | |
| // ββ Demo Textarea ββ | |
| function updateDemo() { | |
| const raw = document.getElementById('demoInput').value; | |
| if (!raw.trim()) { document.getElementById('stepPipeline').innerHTML = ''; return; } | |
| let t = raw; | |
| const lines = SM.PIPELINE_STEPS.map((s,i) => { | |
| const before = t; | |
| if (stepEnabled[s.id]) t = SM.cleanStep(t, s.id); | |
| const changed = before !== t; | |
| const removed = before.length - t.length; | |
| return `<div class="step-line ${changed?'changed':''}"> | |
| <div class="step-line-num">${i+1}</div> | |
| <div class="step-line-name">${s.label.replace(/\d+\.\s/,'')}</div> | |
| <div class="step-line-text">${SM.esc(t)||'<em style="color:var(--tx3)">(kosong)</em>'}</div> | |
| <div class="step-diff">${changed?`β${removed} char`:'sama'}</div> | |
| </div>`; | |
| }); | |
| document.getElementById('stepPipeline').innerHTML = lines.join(''); | |
| } | |
| const demoInput = document.getElementById('demoInput'); | |
| demoInput.addEventListener('input', updateDemo); | |
| // Default example | |
| demoInput.value = rows.length ? rows[0].raw : '@liputan6dotcom Gak sia-sia mendukung #Prabowo-Gibran! https://t.co/abc123 π Data ekonomi tumbuh 5.17%'; | |
| updateDemo(); | |
| // ββ Dataset Stats ββ | |
| const allBefore = rows.map(r => r.wordsBefore); | |
| const allAfter = rows.map(r => r.wordsAfter); | |
| const avgBefore = SM.avg(allBefore).toFixed(1); | |
| const avgAfter = SM.avg(allAfter).toFixed(1); | |
| const avgReduction = rows.map(r => r.wordsBefore > 0 ? Math.round((1-r.wordsAfter/r.wordsBefore)*100) : 0); | |
| const avgRed = SM.avg(avgReduction).toFixed(1); | |
| const emptyAfter = rows.filter(r => r.wordsAfter === 0).length; | |
| document.getElementById('cleaningStats').innerHTML = [ | |
| { label:'Rata-rata Kata Sebelum', value: avgBefore }, | |
| { label:'Rata-rata Kata Sesudah', value: avgAfter }, | |
| { label:'Rata-rata Reduksi', value: avgRed + '%' }, | |
| { label:'Teks Kosong Setelah', value: emptyAfter }, | |
| { label:'Stopwords Digunakan', value: SM.STOPWORDS.size }, | |
| { label:'Total Token Unik', value: new Set(rows.flatMap(r=>r.cleaned.split(' ').filter(Boolean))).size }, | |
| ].map(s => `<span class="stat-pill"><strong>${s.value}</strong> ${s.label}</span>`).join(''); | |
| // ββ Reduction Distribution Chart ββ | |
| const redBins = Array(10).fill(0); | |
| avgReduction.forEach(v => { const i=Math.min(Math.floor(v/10),9); redBins[i]++; }); | |
| SM.mkChart('chartReduction', { | |
| type:'bar', | |
| data:{ labels:redBins.map((_,i)=>`${i*10}β${(i+1)*10}%`), | |
| datasets:[{ label:'Tweet',data:redBins, | |
| backgroundColor:redBins.map((_,i)=>i<3?SM.C.neg:i<7?SM.C.neu:SM.C.pos), | |
| borderWidth:0, borderRadius:3 }]}, | |
| options:{ responsive:true, maintainAspectRatio:false, | |
| plugins:{legend:{display:false}}, | |
| scales:{ x:{grid:{color:SM.gridColor},ticks:{font:{size:9}}}, y:{grid:{color:SM.gridColor},beginAtZero:true} } | |
| } | |
| }); | |
| // ββ Top Words Chart ββ | |
| const wordFreq = {}; | |
| rows.forEach(r => { | |
| r.cleaned.split(/\s+/).filter(Boolean).forEach(w => { wordFreq[w]=(wordFreq[w]||0)+1; }); | |
| }); | |
| const topWords = Object.entries(wordFreq).sort((a,b)=>b[1]-a[1]).slice(0,15); | |
| SM.mkChart('chartWords', { | |
| type:'bar', | |
| data:{ labels:topWords.map(([k])=>k), | |
| datasets:[{ label:'Frekuensi',data:topWords.map(([,v])=>v), | |
| backgroundColor:SM.C.a1d, borderColor:SM.C.a1, borderWidth:1, borderRadius:3 }]}, | |
| options:{ indexAxis:'y', responsive:true, maintainAspectRatio:false, | |
| plugins:{legend:{display:false}}, | |
| scales:{ x:{grid:{color:SM.gridColor},beginAtZero:true}, y:{grid:{display:false},ticks:{font:{size:10}}} } | |
| } | |
| }); | |
| // ββ Before/After Table ββ | |
| document.getElementById('cleanTableBody').innerHTML = rows.slice(0,20).map(r => { | |
| const pct = r.wordsBefore ? Math.round((1-r.wordsAfter/r.wordsBefore)*100) : 0; | |
| const pctCol = pct>70?'var(--pos)':pct>40?'var(--neu)':'var(--neg)'; | |
| return `<tr> | |
| <td class="td-no">${r.id}</td> | |
| <td class="td-trunc" title="${SM.esc(r.raw)}">${SM.esc(r.raw.slice(0,90))}${r.raw.length>90?'β¦':''}</td> | |
| <td class="td-trunc" title="${SM.esc(r.cleaned)}">${r.cleaned?SM.esc(r.cleaned.slice(0,70))+'β¦':'<em style="color:var(--tx3)">(kosong)</em>'}</td> | |
| <td style="text-align:center">${r.wordsBefore}</td> | |
| <td style="text-align:center">${r.wordsAfter}</td> | |
| <td style="text-align:center;font-weight:600;color:${pctCol}">${pct}%</td> | |
| </tr>`; | |
| }).join(''); | |