Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8" /> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /> | |
| <title>ScrubData // console</title> | |
| <style> | |
| /* ============ DESIGN TOKENS ============ */ | |
| :root { | |
| --bg: #0a0d12; | |
| --bg-1: #0f141b; | |
| --bg-2: #141b24; | |
| --bg-3: #1a232e; | |
| --line: #233040; | |
| --line-soft: #1a2330; | |
| --fg: #d7e0ea; | |
| --fg-dim: #8593a3; | |
| --fg-faint: #5a6878; | |
| --accent: #36e0b0; | |
| --accent-d: #0f3b30; | |
| --warn: #f0a330; | |
| --del: #ff5c6c; | |
| --del-bg: #2a1318; | |
| --add: #36e0b0; | |
| --add-bg: #0f2a22; | |
| --chg: #f0c330; | |
| --chg-bg: #2a2410; | |
| --mono: "SFMono-Regular", "JetBrains Mono", "Fira Code", Menlo, Consolas, monospace; | |
| --sans: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; | |
| } | |
| * { box-sizing: border-box; } | |
| html, body { margin:0; padding:0; height:100%; } | |
| body { | |
| background: var(--bg); | |
| color: var(--fg); | |
| font-family: var(--sans); | |
| font-size: 13px; | |
| line-height: 1.45; | |
| -webkit-font-smoothing: antialiased; | |
| background-image: | |
| radial-gradient(900px 500px at 85% -10%, rgba(54,224,176,.06), transparent 60%), | |
| linear-gradient(transparent 0, transparent 31px, var(--line-soft) 31px, var(--line-soft) 32px); | |
| background-size: auto, 100% 32px; | |
| } | |
| ::selection { background: var(--accent); color:#04130d; } | |
| /* ============ SCROLLBARS ============ */ | |
| ::-webkit-scrollbar { width:10px; height:10px; } | |
| ::-webkit-scrollbar-track { background: var(--bg-1); } | |
| ::-webkit-scrollbar-thumb { background: var(--line); border:2px solid var(--bg-1); border-radius:6px; } | |
| ::-webkit-scrollbar-thumb:hover { background:#2e3e52; } | |
| /* ============ TOP BAR ============ */ | |
| header { | |
| position: sticky; top:0; z-index:50; | |
| display:flex; align-items:center; gap:16px; | |
| padding:0 18px; height:48px; | |
| background: rgba(10,13,18,.86); | |
| backdrop-filter: blur(8px); | |
| border-bottom:1px solid var(--line); | |
| } | |
| .brand { display:flex; align-items:center; gap:9px; font-family:var(--mono); font-weight:600; letter-spacing:.5px; } | |
| .logo { | |
| width:22px; height:22px; border-radius:5px; | |
| background: linear-gradient(140deg, var(--accent), #1f8f72); | |
| display:grid; place-items:center; color:#04130d; font-weight:800; font-size:13px; | |
| box-shadow: 0 0 0 1px rgba(54,224,176,.4), 0 0 18px rgba(54,224,176,.25); | |
| } | |
| .brand small { color:var(--fg-faint); font-weight:400; letter-spacing:2px; font-size:10px; } | |
| .topspace { flex:1; } | |
| .kbd-hint { font-family:var(--mono); font-size:11px; color:var(--fg-faint); } | |
| .kbd-hint kbd { | |
| font-family:var(--mono); background:var(--bg-3); border:1px solid var(--line); | |
| border-bottom-width:2px; border-radius:4px; padding:1px 6px; color:var(--fg-dim); font-size:10px; | |
| } | |
| .status-led { | |
| display:flex; align-items:center; gap:7px; font-family:var(--mono); | |
| font-size:11px; color:var(--fg-dim); padding:4px 10px; | |
| border:1px solid var(--line); border-radius:6px; background:var(--bg-1); | |
| } | |
| .led { width:7px; height:7px; border-radius:50%; background:var(--fg-faint); } | |
| .status-led.idle .led { background:var(--fg-faint); } | |
| .status-led.busy .led { background:var(--warn); box-shadow:0 0 8px var(--warn); animation:pulse 1s infinite; } | |
| .status-led.ok .led { background:var(--accent); box-shadow:0 0 8px var(--accent); } | |
| .status-led.err .led { background:var(--del); box-shadow:0 0 8px var(--del); } | |
| @keyframes pulse { 0%,100%{opacity:1} 50%{opacity:.35} } | |
| /* ============ LAYOUT ============ */ | |
| main { max-width: 1500px; margin:0 auto; padding:22px 18px 96px; } | |
| /* ============ COMMAND BAR ============ */ | |
| .cmd { | |
| display:flex; align-items:stretch; gap:0; | |
| border:1px solid var(--line); border-radius:10px; overflow:hidden; | |
| background: linear-gradient(180deg, var(--bg-2), var(--bg-1)); | |
| box-shadow: 0 8px 30px rgba(0,0,0,.4); | |
| } | |
| .drop { | |
| flex:1; display:flex; align-items:center; gap:14px; | |
| padding:16px 18px; cursor:pointer; position:relative; | |
| transition: background .12s; | |
| } | |
| .drop:hover { background: rgba(54,224,176,.04); } | |
| .drop:focus-within { background: rgba(54,224,176,.06); } | |
| .drop:focus-within::after { | |
| content:""; position:absolute; inset:4px; border:1.5px solid var(--accent); border-radius:8px; pointer-events:none; | |
| } | |
| .drop.drag { background: rgba(54,224,176,.09); } | |
| .drop.drag::after { | |
| content:""; position:absolute; inset:6px; border:1.5px dashed var(--accent); border-radius:7px; | |
| } | |
| .drop-icon { | |
| width:42px; height:42px; flex:none; border-radius:9px; | |
| border:1px solid var(--line); background:var(--bg); | |
| display:grid; place-items:center; color:var(--accent); | |
| } | |
| .drop-icon svg { width:20px; height:20px; } | |
| .drop-txt b { display:block; font-size:13.5px; } | |
| .drop-txt span { color:var(--fg-dim); font-size:12px; font-family:var(--mono); } | |
| .file-tag { | |
| color:var(--accent); font-family:var(--mono); font-weight:600; | |
| } | |
| .cmd-actions { display:flex; align-items:center; gap:0; border-left:1px solid var(--line); } | |
| .btn { | |
| font-family:var(--mono); font-size:12.5px; font-weight:600; letter-spacing:.3px; | |
| border:none; cursor:pointer; padding:0 22px; height:100%; | |
| background:transparent; color:var(--fg-dim); transition:all .12s; | |
| display:flex; align-items:center; gap:8px; | |
| } | |
| .btn:hover { color:var(--fg); background:var(--bg-3); } | |
| .btn-primary { | |
| background: linear-gradient(180deg, #3df0bd, #25c79b); | |
| color:#04130d; padding:0 26px; | |
| box-shadow: inset 0 1px 0 rgba(255,255,255,.25); | |
| } | |
| .btn-primary:hover { background:linear-gradient(180deg, #4dffcb, #2cd6a8); color:#04130d; } | |
| .btn-primary:disabled { opacity:.4; cursor:not-allowed; filter:grayscale(.5); } | |
| .btn .spin { | |
| width:13px; height:13px; border:2px solid rgba(4,19,13,.3); border-top-color:#04130d; | |
| border-radius:50%; animation:rot .6s linear infinite; | |
| } | |
| @keyframes rot { to { transform:rotate(360deg) } } | |
| .subbar { | |
| display:flex; align-items:center; gap:14px; margin-top:10px; flex-wrap:wrap; | |
| font-family:var(--mono); font-size:11.5px; color:var(--fg-faint); | |
| } | |
| .subbar a { | |
| color:var(--accent); text-decoration:none; border-bottom:1px dashed rgba(54,224,176,.4); | |
| cursor:pointer; background:none; border-top:none; border-left:none; border-right:none; | |
| font-family:var(--mono); font-size:11.5px; padding:2px 0; | |
| } | |
| .subbar a:hover, .subbar a:focus-visible { border-bottom-style:solid; outline:none; } | |
| .subbar .sep { color:var(--line); } | |
| /* ============ ERROR ============ */ | |
| .err-box { | |
| display:none; margin-top:14px; padding:12px 16px; | |
| border:1px solid #5a2230; border-left:3px solid var(--del); border-radius:8px; | |
| background: var(--del-bg); color:#ffb6bf; font-family:var(--mono); font-size:12px; | |
| } | |
| .err-box.show { display:block; } | |
| .err-box b { color:var(--del); } | |
| /* ============ RESULT GRID ============ */ | |
| .results { display:none; margin-top:24px; } | |
| .results.show { display:block; animation:fade .25s ease; } | |
| @keyframes fade { from{opacity:0; transform:translateY(6px)} to{opacity:1; transform:none} } | |
| /* metric strip */ | |
| .metrics { display:grid; grid-template-columns: repeat(auto-fit, minmax(150px,1fr)); gap:10px; margin-bottom:18px; } | |
| .metric { | |
| border:1px solid var(--line); border-radius:9px; padding:12px 14px; | |
| background: linear-gradient(180deg, var(--bg-2), var(--bg-1)); | |
| } | |
| .metric .k { font-family:var(--mono); font-size:10.5px; letter-spacing:1px; text-transform:uppercase; color:var(--fg-faint); } | |
| .metric .v { font-family:var(--mono); font-size:22px; font-weight:700; margin-top:4px; color:var(--fg); } | |
| .metric .v.accent { color:var(--accent); } | |
| .metric .v.chg { color:var(--chg); } | |
| .metric .v.del { color:var(--del); } | |
| .metric .sub { font-size:11px; color:var(--fg-dim); font-family:var(--mono); margin-top:2px; } | |
| .summary-line { | |
| margin-bottom:18px; padding:12px 16px; border-radius:9px; | |
| background:var(--accent-d); border:1px solid #1c5a48; color:#a8f0d8; | |
| font-size:13px; display:flex; gap:10px; align-items:flex-start; | |
| } | |
| .summary-line svg { width:16px; height:16px; flex:none; margin-top:1px; color:var(--accent); } | |
| /* panel + tabs */ | |
| .panel { border:1px solid var(--line); border-radius:11px; background:var(--bg-1); overflow:hidden; margin-bottom:18px; } | |
| .panel-head { | |
| display:flex; align-items:center; gap:2px; padding:0 6px; min-height:42px; flex-wrap:wrap; | |
| border-bottom:1px solid var(--line); background:var(--bg-2); | |
| } | |
| .panel-title { font-family:var(--mono); font-size:11px; letter-spacing:1.5px; text-transform:uppercase; color:var(--fg-faint); padding:0 12px; } | |
| .tabs { display:flex; gap:2px; margin-left:auto; } | |
| .tab { | |
| font-family:var(--mono); font-size:11.5px; font-weight:600; letter-spacing:.3px; | |
| background:transparent; border:none; color:var(--fg-dim); cursor:pointer; | |
| padding:7px 14px; border-radius:7px; transition:all .12s; | |
| } | |
| .tab:hover { color:var(--fg); background:var(--bg-3); } | |
| .tab.active { color:#04130d; background:var(--accent); } | |
| .tab:focus-visible { outline:2px solid var(--accent); outline-offset:1px; } | |
| .legend { display:flex; gap:14px; align-items:center; padding:0 14px; font-family:var(--mono); font-size:10.5px; color:var(--fg-faint); } | |
| .legend .swatch { display:inline-flex; align-items:center; gap:5px; } | |
| .legend i { width:9px; height:9px; border-radius:2px; display:inline-block; } | |
| .legend .add i { background:var(--add); } | |
| .legend .chg i { background:var(--chg); } | |
| .legend .del i { background:var(--del); } | |
| .cap-note { | |
| padding:7px 14px; font-family:var(--mono); font-size:11px; color:var(--fg-faint); | |
| border-bottom:1px solid var(--line-soft); background:var(--bg-1); | |
| } | |
| .cap-note b { color:var(--fg-dim); } | |
| .table-wrap { overflow:auto; max-height: 520px; } | |
| table.grid { border-collapse:separate; border-spacing:0; width:100%; font-family:var(--mono); font-size:12px; } | |
| table.grid th, table.grid td { | |
| padding:6px 11px; border-bottom:1px solid var(--line-soft); border-right:1px solid var(--line-soft); | |
| white-space:nowrap; max-width:340px; overflow:hidden; text-overflow:ellipsis; | |
| } | |
| table.grid thead th { | |
| position:sticky; top:0; z-index:5; background:var(--bg-3); color:var(--fg-dim); | |
| text-align:left; font-weight:600; font-size:11px; letter-spacing:.4px; | |
| border-bottom:1px solid var(--line); | |
| } | |
| table.grid thead th:first-child, table.grid tbody td.rownum { | |
| position:sticky; left:0; z-index:6; background:var(--bg-2); | |
| color:var(--fg-faint); text-align:right; user-select:none; font-size:11px; | |
| } | |
| table.grid thead th:first-child { z-index:7; } | |
| table.grid tbody tr:hover td { background:rgba(54,224,176,.045); } | |
| table.grid tbody tr:hover td.rownum { background:var(--bg-3); } | |
| td.cell-empty { color:var(--fg-faint); font-style:italic; } | |
| /* diff cell states */ | |
| td.d-add { background:var(--add-bg); color:#7ff0cf; box-shadow: inset 2px 0 0 var(--add); } | |
| td.d-chg { background:var(--chg-bg); color:#f0d88c; box-shadow: inset 2px 0 0 var(--chg); } | |
| td.d-del { background:var(--del-bg); color:#ff9aa4; box-shadow: inset 2px 0 0 var(--del); } | |
| td.d-chg .old { color:var(--del); text-decoration:line-through; opacity:.6; margin-right:6px; } | |
| td.d-chg .arrow { color:var(--fg-faint); margin:0 4px; } | |
| tr.row-removed td { opacity:.5; background:var(--del-bg); } | |
| tr.row-removed td.rownum { color:var(--del); opacity:1; } | |
| tr.row-added td { background:var(--add-bg); } | |
| tr.row-added td.rownum { color:var(--add); } | |
| .empty-note { padding:40px; text-align:center; color:var(--fg-faint); font-family:var(--mono); font-size:12px; } | |
| /* report */ | |
| .report-body { padding:18px 22px; font-size:13px; } | |
| .report-body h1, .report-body h2, .report-body h3 { font-family:var(--mono); letter-spacing:.3px; } | |
| .report-body h1 { font-size:17px; border-bottom:1px solid var(--line); padding-bottom:8px; color:var(--fg); } | |
| .report-body h2 { font-size:14px; color:var(--accent); margin-top:20px; } | |
| .report-body h3 { font-size:12.5px; color:var(--fg-dim); margin-top:14px; } | |
| .report-body p { color:var(--fg-dim); } | |
| .report-body em { color:var(--fg-faint); font-style:italic; } | |
| .report-body code { font-family:var(--mono); background:var(--bg-3); padding:1px 6px; border-radius:4px; color:var(--accent); font-size:12px; } | |
| .report-body ul { padding-left:20px; } | |
| .report-body li { margin:3px 0; color:var(--fg-dim); } | |
| .report-body li::marker { color:var(--accent); } | |
| .report-body strong { color:var(--fg); } | |
| .report-body table { border-collapse:collapse; font-family:var(--mono); font-size:12px; margin:10px 0; } | |
| .report-body table td, .report-body table th { border:1px solid var(--line); padding:5px 10px; } | |
| .report-body table th { background:var(--bg-3); color:var(--fg-dim); } | |
| .report-body hr { border:none; border-top:1px solid var(--line); margin:18px 0; } | |
| /* download dock */ | |
| .dock { | |
| position:fixed; bottom:0; left:0; right:0; z-index:40; | |
| display:none; align-items:center; gap:14px; padding:11px 22px; | |
| background:rgba(15,20,27,.94); backdrop-filter:blur(8px); border-top:1px solid var(--line); | |
| } | |
| .dock.show { display:flex; } | |
| .dock .info { font-family:var(--mono); font-size:11.5px; color:var(--fg-dim); } | |
| .dock .info b { color:var(--accent); } | |
| .dock-space { flex:1; } | |
| .btn-download { | |
| font-family:var(--mono); font-size:12.5px; font-weight:700; letter-spacing:.3px; | |
| border:1px solid #1c5a48; border-radius:8px; cursor:pointer; | |
| background:linear-gradient(180deg,#3df0bd,#25c79b); color:#04130d; | |
| padding:10px 20px; display:flex; align-items:center; gap:9px; | |
| box-shadow: 0 0 24px rgba(54,224,176,.3); | |
| } | |
| .btn-download:hover { filter:brightness(1.08); } | |
| .btn-download:focus-visible { outline:2px solid #fff; outline-offset:2px; } | |
| .btn-download svg { width:15px; height:15px; } | |
| .ghost { | |
| position:absolute; width:1px; height:1px; padding:0; margin:-1px; | |
| overflow:hidden; clip:rect(0 0 0 0); white-space:nowrap; border:0; | |
| } | |
| .visually-hidden { | |
| position:absolute; width:1px; height:1px; overflow:hidden; | |
| clip:rect(0 0 0 0); white-space:nowrap; border:0; margin:-1px; padding:0; | |
| } | |
| /* ============ RESPONSIVE ============ */ | |
| @media (max-width: 720px) { | |
| main { padding:16px 12px 120px; } | |
| .cmd { flex-direction:column; } | |
| .cmd-actions { border-left:none; border-top:1px solid var(--line); } | |
| .btn-primary { width:100%; justify-content:center; padding:14px 0; } | |
| .kbd-hint { display:none; } | |
| header { gap:10px; padding:0 12px; } | |
| .metrics { grid-template-columns: repeat(2, 1fr); } | |
| .panel-head { padding:6px; } | |
| .tabs { margin-left:0; width:100%; justify-content:flex-end; } | |
| .table-wrap { max-height: 60vh; } | |
| .dock { flex-direction:column; align-items:stretch; gap:8px; padding:10px 12px; } | |
| .dock-space { display:none; } | |
| .btn-download { width:100%; justify-content:center; } | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <header> | |
| <div class="brand"> | |
| <div class="logo">S</div> | |
| ScrubData <small>// CONSOLE</small> | |
| </div> | |
| <div class="topspace"></div> | |
| <div class="kbd-hint">press <kbd>β</kbd> <kbd>β΅</kbd> to clean Β· <kbd>O</kbd> to open file</div> | |
| <div class="status-led idle" id="statusLed" role="status" aria-live="polite"> | |
| <span class="led" aria-hidden="true"></span><span id="statusTxt">idle</span> | |
| </div> | |
| </header> | |
| <main> | |
| <!-- COMMAND BAR --> | |
| <div class="cmd"> | |
| <label class="drop" id="dropZone" for="fileInput"> | |
| <input type="file" id="fileInput" class="ghost" accept=".csv,.xlsx,.xls" | |
| aria-label="Choose a CSV or Excel file to clean" /> | |
| <div class="drop-icon" aria-hidden="true"> | |
| <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.8"> | |
| <path d="M12 16V4M12 4l-4 4M12 4l4 4" stroke-linecap="round" stroke-linejoin="round"/> | |
| <path d="M4 16v2a2 2 0 002 2h12a2 2 0 002-2v-2" stroke-linecap="round"/> | |
| </svg> | |
| </div> | |
| <div class="drop-txt"> | |
| <b id="dropTitle">Drop a CSV / Excel file</b> | |
| <span id="dropSub">or click to browse Β· .csv .xlsx .xls</span> | |
| </div> | |
| </label> | |
| <div class="cmd-actions"> | |
| <button class="btn btn-primary" id="runBtn" disabled> | |
| <span id="runLabel">βΆ CLEAN IT</span> | |
| </button> | |
| </div> | |
| </div> | |
| <div class="subbar"> | |
| <span>No file? Try the deliberately messy sample:</span> | |
| <a id="sampleBtn" role="button" tabindex="0">load samples/dirty_contacts.csv β</a> | |
| <span class="sep">|</span> | |
| <span>raw strings in Β· trustworthy table out Β· nothing silent</span> | |
| </div> | |
| <div class="err-box" id="errBox" role="alert" aria-live="assertive"><b>ERR</b> <span id="errMsg"></span></div> | |
| <!-- RESULTS --> | |
| <div class="results" id="results"> | |
| <div class="metrics" id="metrics"></div> | |
| <div class="summary-line" id="summaryLine"> | |
| <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" aria-hidden="true"> | |
| <path d="M9 12l2 2 4-4" stroke-linecap="round" stroke-linejoin="round"/> | |
| <circle cx="12" cy="12" r="9"/> | |
| </svg> | |
| <span id="summaryTxt"></span> | |
| </div> | |
| <!-- DATA PANEL --> | |
| <div class="panel"> | |
| <div class="panel-head"> | |
| <span class="panel-title">data</span> | |
| <div class="legend" id="legend" style="display:none"> | |
| <span class="swatch chg"><i></i>changed</span> | |
| <span class="swatch add"><i></i>added</span> | |
| <span class="swatch del"><i></i>removed</span> | |
| </div> | |
| <div class="tabs" role="tablist" aria-label="Data view"> | |
| <button class="tab active" data-view="diff" role="tab" aria-selected="true">DIFF</button> | |
| <button class="tab" data-view="before" role="tab" aria-selected="false">BEFORE</button> | |
| <button class="tab" data-view="after" role="tab" aria-selected="false">AFTER</button> | |
| </div> | |
| </div> | |
| <div class="cap-note" id="capNote" style="display:none"></div> | |
| <div class="table-wrap" id="tableWrap"></div> | |
| </div> | |
| <!-- REPORT PANEL --> | |
| <div class="panel"> | |
| <div class="panel-head"> | |
| <span class="panel-title">change report</span> | |
| </div> | |
| <div class="report-body" id="reportBody"></div> | |
| </div> | |
| </div> | |
| </main> | |
| <!-- DOWNLOAD DOCK --> | |
| <div class="dock" id="dock"> | |
| <div class="info">output ready Β· <b id="dockName">scrubbed.csv</b> Β· <span id="dockSize"></span></div> | |
| <div class="dock-space"></div> | |
| <button class="btn-download" id="downloadBtn"> | |
| <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.2" aria-hidden="true"> | |
| <path d="M12 4v11M12 15l-4-4M12 15l4-4" stroke-linecap="round" stroke-linejoin="round"/> | |
| <path d="M5 19h14" stroke-linecap="round"/> | |
| </svg> | |
| DOWNLOAD scrubbed.csv | |
| </button> | |
| </div> | |
| <script type="module"> | |
| import { Client, handle_file } from "https://cdn.jsdelivr.net/npm/@gradio/client"; | |
| /* ---------- element refs ---------- */ | |
| const $ = (id) => document.getElementById(id); | |
| const fileInput = $("fileInput"); | |
| const dropZone = $("dropZone"); | |
| const runBtn = $("runBtn"); | |
| const runLabel = $("runLabel"); | |
| const sampleBtn = $("sampleBtn"); | |
| const errBox = $("errBox"); | |
| const errMsg = $("errMsg"); | |
| const results = $("results"); | |
| const tableWrap = $("tableWrap"); | |
| const reportBody= $("reportBody"); | |
| const dock = $("dock"); | |
| const dockSize = $("dockSize"); | |
| const downloadBtn = $("downloadBtn"); | |
| const statusLed = $("statusLed"); | |
| const statusTxt = $("statusTxt"); | |
| const legend = $("legend"); | |
| const capNote = $("capNote"); | |
| /* ---------- client-side render cap (defensive: backend caps at preview_cap) */ | |
| const RENDER_CAP = 200; | |
| /* ---------- state ---------- */ | |
| let selectedFile = null; // File object (from input/drop) -> uploaded as a real file | |
| let lastResult = null; // backend dict | |
| let currentView = "diff"; | |
| // Path served by the server's /samples StaticFiles mount. | |
| const SAMPLE_PATH = "/samples/dirty_contacts.csv"; | |
| /* ---------- status helper ---------- */ | |
| function setStatus(kind, txt){ | |
| statusLed.className = "status-led " + kind; | |
| statusTxt.textContent = txt; | |
| } | |
| function showError(msg){ | |
| errMsg.textContent = msg; | |
| errBox.classList.add("show"); | |
| setStatus("err","error"); | |
| } | |
| function clearError(){ errBox.classList.remove("show"); errMsg.textContent = ""; } | |
| /* ---------- file selection ---------- */ | |
| function pickFile(file){ | |
| selectedFile = file; | |
| $("dropTitle").innerHTML = '<span class="file-tag"></span>'; | |
| $("dropTitle").firstChild.textContent = file.name; | |
| $("dropSub").textContent = (file.size/1024).toFixed(1) + " KB Β· ready"; | |
| runBtn.disabled = false; | |
| clearError(); | |
| setStatus("idle","ready"); | |
| } | |
| // Fetch the bundled sample over HTTP and turn it into a real File, so it's | |
| // uploaded like any user file. (Passing a URL to handle_file would make the | |
| // server re-download from its own origin, which its SSRF guard rejects.) | |
| async function pickSample(){ | |
| clearError(); | |
| setStatus("busy","loading sampleβ¦"); | |
| try { | |
| const resp = await fetch(SAMPLE_PATH, { cache: "no-store" }); | |
| if (!resp.ok) throw new Error("Could not load sample (HTTP " + resp.status + ")."); | |
| const blob = await resp.blob(); | |
| const file = new File([blob], "dirty_contacts.csv", { type: "text/csv" }); | |
| selectedFile = file; | |
| $("dropTitle").innerHTML = '<span class="file-tag">dirty_contacts.csv</span>'; | |
| $("dropSub").textContent = (file.size/1024).toFixed(1) + " KB Β· sample Β· ready"; | |
| runBtn.disabled = false; | |
| setStatus("idle","ready Β· sample"); | |
| } catch (err){ | |
| showError((err && err.message) ? err.message : String(err)); | |
| } | |
| } | |
| fileInput.addEventListener("change", (e) => { | |
| if (e.target.files && e.target.files[0]) pickFile(e.target.files[0]); | |
| }); | |
| sampleBtn.addEventListener("click", (e) => { e.preventDefault(); pickSample(); }); | |
| sampleBtn.addEventListener("keydown", (e) => { | |
| if (e.key === "Enter" || e.key === " ") { e.preventDefault(); pickSample(); } | |
| }); | |
| /* drag & drop */ | |
| ["dragenter","dragover"].forEach(ev => | |
| dropZone.addEventListener(ev, (e)=>{ e.preventDefault(); dropZone.classList.add("drag"); })); | |
| ["dragleave","drop"].forEach(ev => | |
| dropZone.addEventListener(ev, (e)=>{ e.preventDefault(); dropZone.classList.remove("drag"); })); | |
| dropZone.addEventListener("drop", (e)=>{ | |
| if (e.dataTransfer.files && e.dataTransfer.files[0]) pickFile(e.dataTransfer.files[0]); | |
| }); | |
| /* ---------- keyboard ---------- */ | |
| function isTextEntry(el){ | |
| if (!el) return false; | |
| const tag = el.tagName; | |
| return tag === "INPUT" || tag === "TEXTAREA" || tag === "SELECT" || el.isContentEditable; | |
| } | |
| document.addEventListener("keydown", (e)=>{ | |
| // Cmd/Ctrl+Enter: clean (safe even in text fields) | |
| if ((e.metaKey || e.ctrlKey) && e.key === "Enter"){ | |
| e.preventDefault(); if(!runBtn.disabled) run(); return; | |
| } | |
| // bare shortcuts: never fire while typing in a field | |
| if (e.metaKey || e.ctrlKey || e.altKey || isTextEntry(document.activeElement)) return; | |
| if (e.key.toLowerCase() === "o"){ e.preventDefault(); fileInput.click(); } | |
| else if (results.classList.contains("show")){ | |
| if (e.key === "1") setView("diff"); | |
| else if (e.key === "2") setView("before"); | |
| else if (e.key === "3") setView("after"); | |
| } | |
| }); | |
| /* ---------- run cleaning ---------- */ | |
| runBtn.addEventListener("click", run); | |
| async function run(){ | |
| if (!selectedFile) return; | |
| clearError(); | |
| runBtn.disabled = true; | |
| runLabel.innerHTML = '<span class="spin"></span> CLEANING'; | |
| setStatus("busy","cleaningβ¦"); | |
| results.classList.remove("show"); | |
| dock.classList.remove("show"); | |
| try { | |
| const client = await Client.connect(window.location.origin); | |
| const result = await client.predict("/clean_data", { file_path: handle_file(selectedFile) }); | |
| const data = result.data[0]; | |
| if (!data || typeof data !== "object") throw new Error("Unexpected response shape from /clean_data."); | |
| lastResult = data; | |
| render(data); | |
| setStatus("ok","done"); | |
| } catch (err){ | |
| console.error(err); | |
| showError((err && err.message) ? err.message : String(err)); | |
| } finally { | |
| runBtn.disabled = false; | |
| runLabel.innerHTML = "βΆ CLEAN IT"; | |
| } | |
| } | |
| /* ---------- schema accessors (match server.py response dict) ---------- */ | |
| function getBefore(d){ return Array.isArray(d.before) ? d.before : []; } | |
| function getAfter(d){ return Array.isArray(d.after) ? d.after : []; } | |
| function getColsBefore(d){ | |
| if (Array.isArray(d.columns_before) && d.columns_before.length) return d.columns_before.slice(); | |
| return columnsOf(getBefore(d)); | |
| } | |
| function getColsAfter(d){ | |
| if (Array.isArray(d.columns_after) && d.columns_after.length) return d.columns_after.slice(); | |
| return columnsOf(getAfter(d)); | |
| } | |
| function getAlignment(d){ return Array.isArray(d.alignment) ? d.alignment : null; } | |
| function getChangeLog(d){ return Array.isArray(d.change_log) ? d.change_log : []; } | |
| /* ---------- render orchestrator ---------- */ | |
| function render(data){ | |
| renderMetrics(data); | |
| $("summaryTxt").textContent = data.summary || "Cleaning complete."; | |
| renderReport(data.report_md); | |
| renderCapNote(data); | |
| renderTable(); | |
| results.classList.add("show"); | |
| if (typeof data.csv_text === "string" && data.csv_text.length){ | |
| dockSize.textContent = (new Blob([data.csv_text]).size/1024).toFixed(1) + " KB"; | |
| dock.classList.add("show"); | |
| } else { | |
| dock.classList.remove("show"); | |
| } | |
| } | |
| function renderCapNote(data){ | |
| const totalAfter = num(data.total_rows_after, getAfter(data).length); | |
| const shownAfter = getAfter(data).length; | |
| if (totalAfter > shownAfter || shownAfter > RENDER_CAP){ | |
| const shown = Math.min(shownAfter, RENDER_CAP); | |
| capNote.style.display = "block"; | |
| capNote.innerHTML = `Preview Β· showing first <b>${shown}</b> of <b>${totalAfter.toLocaleString()}</b> output rows. Download for the full dataset.`; | |
| } else { | |
| capNote.style.display = "none"; | |
| } | |
| } | |
| function renderMetrics(data){ | |
| const colsBefore = getColsBefore(data); | |
| const colsAfter = getColsAfter(data); | |
| // True dataset totals from the backend (fall back to preview length). | |
| const totalBefore = num(data.total_rows_before, getBefore(data).length); | |
| const totalAfter = num(data.total_rows_after, getAfter(data).length); | |
| const rowsDropped = Math.max(0, totalBefore - totalAfter); | |
| const colsDropped = Math.max(0, colsBefore.length - colsAfter.length); | |
| const changed = diffStats(data).changed; | |
| const cards = [ | |
| { k:"rows out", v:totalAfter.toLocaleString(), | |
| sub: rowsDropped ? ("β"+rowsDropped.toLocaleString()+" from "+totalBefore.toLocaleString()) : "no rows dropped", cls:"accent" }, | |
| { k:"columns", v:colsAfter.length, | |
| sub: colsDropped ? ("β"+colsDropped+" dropped") : "all kept", cls:"" }, | |
| { k:"cells changed", v:changed, | |
| sub:"in preview", cls: changed ? "chg":"" }, | |
| { k:"rows removed", v:rowsDropped.toLocaleString(), | |
| sub:"dups / empties", cls: rowsDropped ? "del":"" }, | |
| ]; | |
| $("metrics").innerHTML = cards.map(c => | |
| `<div class="metric"><div class="k">${escapeHtml(c.k)}</div> | |
| <div class="v ${c.cls}">${escapeHtml(String(c.v))}</div><div class="sub">${escapeHtml(c.sub)}</div></div>` | |
| ).join(""); | |
| } | |
| /* ---------- diff computation (alignment-driven) ---------- */ | |
| // Build the list of row-pairings from the backend alignment, capped for render. | |
| function buildRows(data){ | |
| const before = getBefore(data), after = getAfter(data); | |
| const align = getAlignment(data); | |
| let rows = []; | |
| if (align){ | |
| for (const op of align){ | |
| if (op.type === "pair") rows.push({ kind:"pair", b:before[op.b], a:after[op.a], bi:op.b, ai:op.a }); | |
| else if (op.type === "removed")rows.push({ kind:"removed", b:before[op.b], a:null, bi:op.b, ai:null }); | |
| else if (op.type === "added") rows.push({ kind:"added", b:null, a:after[op.a], bi:null, ai:op.a }); | |
| } | |
| } else { | |
| // Fallback: no alignment from backend β pair by index (overlap), then tail. | |
| const n = Math.min(before.length, after.length); | |
| for (let i=0;i<n;i++) rows.push({ kind:"pair", b:before[i], a:after[i], bi:i, ai:i }); | |
| for (let i=n;i<before.length;i++) rows.push({ kind:"removed", b:before[i], a:null, bi:i, ai:null }); | |
| for (let i=n;i<after.length;i++) rows.push({ kind:"added", b:null, a:after[i], bi:null, ai:i }); | |
| } | |
| return rows.slice(0, RENDER_CAP); | |
| } | |
| function diffStats(data){ | |
| const rows = buildRows(data); | |
| const colsAfter = new Set(getColsAfter(data)); | |
| let changed = 0; | |
| for (const r of rows){ | |
| if (r.kind !== "pair") continue; | |
| const b = r.b || {}, a = r.a || {}; | |
| for (const c of colsAfter){ | |
| if (norm(b[c]) !== norm(a[c])) changed++; | |
| } | |
| } | |
| return { changed }; | |
| } | |
| function norm(v){ return v===undefined||v===null ? "" : String(v); } | |
| function num(v, fallback){ return (typeof v === "number" && isFinite(v)) ? v : fallback; } | |
| /* ---------- table rendering ---------- */ | |
| document.querySelectorAll(".tab").forEach(t => | |
| t.addEventListener("click", ()=> setView(t.dataset.view))); | |
| function setView(v){ | |
| currentView = v; | |
| document.querySelectorAll(".tab").forEach(t => { | |
| const on = t.dataset.view===v; | |
| t.classList.toggle("active", on); | |
| t.setAttribute("aria-selected", on ? "true" : "false"); | |
| }); | |
| legend.style.display = v==="diff" ? "flex" : "none"; | |
| renderTable(); | |
| } | |
| function renderTable(){ | |
| if (!lastResult){ return; } | |
| if (currentView === "before") tableWrap.innerHTML = plainTable(getBefore(lastResult), getColsBefore(lastResult)); | |
| else if (currentView === "after") tableWrap.innerHTML = plainTable(getAfter(lastResult), getColsAfter(lastResult)); | |
| else tableWrap.innerHTML = diffTable(lastResult); | |
| } | |
| // Infer column order from row-dict keys (fallback only β prefer backend columns). | |
| function columnsOf(rows){ | |
| const seen = []; const set = new Set(); | |
| for (const r of rows){ for (const k of Object.keys(r||{})){ if(!set.has(k)){ set.add(k); seen.push(k);} } } | |
| return seen; | |
| } | |
| function cellHtml(val){ | |
| const s = norm(val); | |
| if (s === "") return '<td class="cell-empty">β </td>'; | |
| return `<td title="${escapeHtml(s)}">${escapeHtml(s)}</td>`; | |
| } | |
| function plainTable(rows, cols){ | |
| if (!rows.length) return `<div class="empty-note">no rows</div>`; | |
| cols = (cols && cols.length) ? cols : columnsOf(rows); | |
| if (!cols.length) return `<div class="empty-note">no columns</div>`; | |
| const shown = rows.slice(0, RENDER_CAP); | |
| let h = '<table class="grid"><thead><tr><th>#</th>'; | |
| h += cols.map(c=>`<th>${escapeHtml(c)}</th>`).join(""); | |
| h += '</tr></thead><tbody>'; | |
| shown.forEach((r,i)=>{ | |
| h += `<tr><td class="rownum">${i+1}</td>`; | |
| h += cols.map(c=>cellHtml(r ? r[c] : "")).join(""); | |
| h += '</tr>'; | |
| }); | |
| return h + '</tbody></table>'; | |
| } | |
| /* Alignment-driven diff: rows are paired by the backend's content-based | |
| alignment (handles dropped/deduped rows correctly), not by naive index. | |
| Columns come from the backend's known column sets, so dropped/added | |
| columns are detected reliably rather than guessed. */ | |
| function diffTable(data){ | |
| const before = getBefore(data), after = getAfter(data); | |
| if (!before.length && !after.length) return `<div class="empty-note">no data</div>`; | |
| const colsAfter = getColsAfter(data); | |
| const colsBefore = getColsBefore(data); | |
| const afterSet = new Set(colsAfter); | |
| const beforeSet = new Set(colsBefore); | |
| // column order: kept/after columns first, then before-only (dropped) columns. | |
| const cols = [...colsAfter, ...colsBefore.filter(c=>!afterSet.has(c))]; | |
| if (!cols.length) return `<div class="empty-note">no columns</div>`; | |
| const rows = buildRows(data); | |
| let h = '<table class="grid"><thead><tr><th>#</th>'; | |
| h += cols.map(c=>{ | |
| const dropped = !afterSet.has(c); | |
| return `<th${dropped?' style="color:var(--del)"':''}>${escapeHtml(c)}${dropped?' β':''}</th>`; | |
| }).join(""); | |
| h += '</tr></thead><tbody>'; | |
| let n = 0; | |
| for (const r of rows){ | |
| n++; | |
| if (r.kind === "removed"){ | |
| const b = r.b || {}; | |
| h += `<tr class="row-removed"><td class="rownum" title="removed">β${n}</td>`; | |
| h += cols.map(c => cellHtml(b[c])).join(""); | |
| h += '</tr>'; | |
| continue; | |
| } | |
| if (r.kind === "added"){ | |
| const a = r.a || {}; | |
| h += `<tr class="row-added"><td class="rownum" title="added">+${n}</td>`; | |
| h += cols.map(c => afterSet.has(c) ? cellHtml(a[c]) : '<td class="cell-empty">β </td>').join(""); | |
| h += '</tr>'; | |
| continue; | |
| } | |
| // pair | |
| const b = r.b || {}, a = r.a || {}; | |
| h += `<tr><td class="rownum">${n}</td>`; | |
| h += cols.map(c=>{ | |
| const dropped = !afterSet.has(c); | |
| if (dropped){ | |
| const bv = norm(b[c]); | |
| return bv==="" ? '<td class="d-del cell-empty">β </td>' | |
| : `<td class="d-del" title="${escapeHtml(bv)} (column dropped)">${escapeHtml(bv)}</td>`; | |
| } | |
| const newCol = !beforeSet.has(c); | |
| const bv = norm(b[c]), av = norm(a[c]); | |
| if (newCol){ | |
| return av==="" ? '<td class="d-add cell-empty">β </td>' | |
| : `<td class="d-add" title="${escapeHtml(av)} (new column)">${escapeHtml(av)}</td>`; | |
| } | |
| if (bv === av){ | |
| return av==="" ? '<td class="cell-empty">β </td>' | |
| : `<td title="${escapeHtml(av)}">${escapeHtml(av)}</td>`; | |
| } | |
| // changed cell β show old β new | |
| const oldDisp = bv==="" ? "β " : escapeHtml(bv); | |
| const newDisp = av==="" ? "β " : escapeHtml(av); | |
| return `<td class="d-chg" title="${escapeHtml(bv)} β ${escapeHtml(av)}">`+ | |
| `<span class="old">${oldDisp}</span><span class="arrow">β</span>${newDisp}</td>`; | |
| }).join(""); | |
| h += '</tr>'; | |
| } | |
| return h + '</tbody></table>'; | |
| } | |
| /* ---------- minimal markdown renderer ---------- */ | |
| function renderReport(md){ | |
| const src = (typeof md === "string" && md.trim()) ? md : "*No report returned.*"; | |
| reportBody.innerHTML = mdToHtml(src); | |
| } | |
| function mdToHtml(md){ | |
| const lines = md.replace(/\r\n/g,"\n").split("\n"); | |
| let out = [], inUl=false, inTable=false, tableBuf=[]; | |
| const flushUl = ()=>{ if(inUl){ out.push("</ul>"); inUl=false; } }; | |
| const flushTable = ()=>{ | |
| if(!inTable) return; | |
| const rows = tableBuf.filter(r => !/^\s*\|?[\s:|-]+\|?\s*$/.test(r)); | |
| let t = "<table>"; | |
| rows.forEach((r,idx)=>{ | |
| const cells = r.replace(/^\||\|$/g,"").split("|").map(c=>c.trim()); | |
| const tag = idx===0 ? "th":"td"; | |
| t += "<tr>"+cells.map(c=>`<${tag}>${inline(c)}</${tag}>`).join("")+"</tr>"; | |
| }); | |
| t += "</table>"; | |
| out.push(t); inTable=false; tableBuf=[]; | |
| }; | |
| for (let raw of lines){ | |
| const line = raw.replace(/\s+$/,""); | |
| if (line.trim().startsWith("|") && line.includes("|")){ flushUl(); inTable=true; tableBuf.push(line); continue; } | |
| else flushTable(); | |
| if (/^#{1,6}\s/.test(line)){ | |
| flushUl(); | |
| const lvl = line.match(/^#+/)[0].length; | |
| out.push(`<h${Math.min(lvl,3)}>${inline(line.replace(/^#+\s*/,""))}</h${Math.min(lvl,3)}>`); | |
| } else if (/^\s*[-*]\s+/.test(line)){ | |
| if(!inUl){ out.push("<ul>"); inUl=true; } | |
| out.push(`<li>${inline(line.replace(/^\s*[-*]\s+/,""))}</li>`); | |
| } else if (/^\s*---+\s*$/.test(line)){ | |
| flushUl(); out.push("<hr/>"); | |
| } else if (line.trim()===""){ | |
| flushUl(); | |
| } else { | |
| flushUl(); out.push(`<p>${inline(line)}</p>`); | |
| } | |
| } | |
| flushUl(); flushTable(); | |
| return out.join("\n"); | |
| } | |
| function inline(s){ | |
| s = escapeHtml(s); | |
| s = s.replace(/`([^`]+)`/g, "<code>$1</code>"); | |
| s = s.replace(/\*\*([^*]+)\*\*/g, "<strong>$1</strong>"); | |
| // italics: *text* not adjacent to another * (so it won't eat ** bold) | |
| s = s.replace(/(^|[^*])\*([^*\s][^*]*?)\*(?!\*)/g, "$1<em>$2</em>"); | |
| return s; | |
| } | |
| /* ---------- download ---------- */ | |
| downloadBtn.addEventListener("click", ()=>{ | |
| if (!lastResult || typeof lastResult.csv_text !== "string" || !lastResult.csv_text.length) return; | |
| const blob = new Blob([lastResult.csv_text], { type:"text/csv;charset=utf-8" }); | |
| const url = URL.createObjectURL(blob); | |
| const a = document.createElement("a"); | |
| a.href = url; a.download = "scrubbed.csv"; | |
| document.body.appendChild(a); a.click(); a.remove(); | |
| setTimeout(()=>URL.revokeObjectURL(url), 1000); | |
| }); | |
| /* ---------- util ---------- */ | |
| function escapeHtml(s){ | |
| return String(s).replace(/[&<>"']/g, c => ({ | |
| "&":"&","<":"<",">":">","\"":""","'":"'" | |
| }[c])); | |
| } | |
| </script> | |
| </body> | |
| </html> | |