scrubdata / frontend /index.html
OpenAI Codex
deploy: add sponsor:openai tag (Best Use of Codex) + Codex-hardened build
16dc556
Raw
History Blame Contribute Delete
54.3 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>ScrubData β€” clean spreadsheets, with the receipts</title>
<style>
:root{
--paper:#faf7f2; --card:#fffdfa; --ink:#23201c; --ink-soft:#6b6359;
--line:#ece5da; --accent:#2f6f5e; --accent-soft:#e7f1ec;
--done:#3f7d5f; --done-bg:#eef5ef; --done-line:#cfe3d4;
--call:#b06a1f; --call-bg:#fbf1e2; --call-line:#f0dcbf;
--flag:#7a7367; --flag-bg:#f3efe8;
--pii:#8a4baf; --pii-bg:#f3ecf9; --pii-line:#e2d3ef;
--shadow:0 1px 2px rgba(40,30,20,.04),0 8px 24px rgba(40,30,20,.06);
--r:15px;
}
*{box-sizing:border-box}
body{margin:0;background:var(--paper);color:var(--ink);
font-family:Inter,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,sans-serif;
line-height:1.5;-webkit-font-smoothing:antialiased}
.wrap{max-width:780px;margin:0 auto;padding:0 22px}
a{color:var(--accent)}
.ribbon{background:var(--accent-soft);color:#234e42;font-size:13.5px;
text-align:center;padding:9px 16px;border-bottom:1px solid #d6e7df}
.ribbon b{font-weight:600}
header{padding:36px 0 6px}
.logo{display:flex;align-items:center;gap:9px;font-weight:700;font-size:18px;letter-spacing:-.2px}
.logo .mark{width:26px;height:26px;border-radius:8px;background:var(--accent);
display:grid;place-items:center;color:#fff;font-size:15px}
h1{font-size:29px;line-height:1.15;letter-spacing:-.6px;margin:20px 0 8px;font-weight:740}
.sub{color:var(--ink-soft);font-size:16px;max-width:600px}
/* upload */
.drop{margin:24px 0;background:var(--card);border:2px dashed var(--line);border-radius:18px;
padding:42px 24px;text-align:center;cursor:pointer;transition:border-color .15s, background .15s}
.drop:focus-visible,
.btn:focus-visible,
.samplelink:focus-visible,
.modeltoggle:focus-within,
.tie .opt:focus-visible,
.tie .keep:focus-visible{
outline:3px solid var(--accent);
outline-offset:3px;
}
.drop.drag{border-color:var(--accent);background:var(--accent-soft)}
.drop h3{margin:0 0 4px;font-size:18px;font-weight:700}
.drop p{margin:0;color:var(--ink-soft);font-size:14px}
.drop .picked{color:var(--accent);font-weight:650}
.actions-row{display:flex;gap:11px;align-items:center;justify-content:center;margin:6px 0 30px}
.btn{font:inherit;font-size:14.5px;font-weight:600;padding:11px 20px;border-radius:11px;
cursor:pointer;border:1px solid var(--line);background:#fff;color:var(--ink)}
.btn.primary{background:var(--accent);border-color:var(--accent);color:#fff}
.btn.primary:disabled{opacity:.45;cursor:not-allowed}
.btn.nudge{animation:nudge 1.6s ease}
@keyframes nudge{0%,100%{box-shadow:0 0 0 0 rgba(47,111,94,0)}
20%,60%{box-shadow:0 0 0 6px rgba(47,111,94,.18)}}
.samplelink{font-size:13.5px;color:var(--accent);cursor:pointer;text-decoration:underline}
/* hero "what cleaning means" demo strip */
.demo{margin:26px 0 6px;background:var(--card);border:1px solid var(--line);border-radius:18px;
padding:18px 22px;box-shadow:var(--shadow)}
.demolab{font-size:11.5px;letter-spacing:.07em;text-transform:uppercase;color:var(--ink-soft);
font-weight:700;margin-bottom:14px;text-align:center}
.demogrid{display:grid;grid-template-columns:1fr auto 1fr;gap:9px 14px;align-items:center;
font-family:"SF Mono",ui-monospace,Menlo,monospace;font-size:13.5px}
.demogrid .b{color:#9a8d7c;text-align:right}
.demogrid .b s{text-decoration-color:#dcc8be}
.demogrid .ar{color:var(--accent);text-align:center;font-size:15px}
.demogrid .a{color:var(--done);font-weight:650}
.demofoot{text-align:center;color:var(--ink-soft);font-size:13.5px;margin-top:16px;line-height:1.5}
.demofoot b{color:var(--ink);font-weight:650}
@media(max-width:520px){.demogrid{font-size:12px;gap:7px 8px}}
.quests{max-width:840px;margin:36px auto 8px;padding:0 22px}
.quests h4{font-size:12px;letter-spacing:.08em;text-transform:uppercase;
color:var(--ink-soft);margin:0 0 11px;font-weight:700;text-align:center}
.qgrid{display:grid;grid-template-columns:repeat(auto-fit,minmax(225px,1fr));gap:10px}
.quest{background:var(--card);border:1px solid var(--line);border-radius:12px;padding:12px 13px}
.quest .pill{display:inline-flex;align-items:center;gap:6px;font-size:11.5px;font-weight:700;
color:#234e42;background:var(--accent-soft);border-radius:999px;padding:3px 10px;margin-bottom:8px}
.quest p{margin:0;font-size:12.5px;color:var(--ink-soft);line-height:1.45}
.reslinks{max-width:760px;margin:30px auto 4px;padding:0 22px;text-align:center}
.reslinks h4{font-size:12px;letter-spacing:.08em;text-transform:uppercase;
color:var(--ink-soft);margin:0 0 9px;font-weight:700}
.reslinks a{display:inline-block;margin:4px 9px;font-size:13.5px;color:var(--accent);
text-decoration:none;border-bottom:1px solid var(--accent-soft)}
.reslinks a:hover{border-bottom-color:var(--accent)}
.modeltoggle{display:flex;align-items:center;gap:7px;font-size:13.5px;color:var(--ink);cursor:pointer;user-select:none}
.modeltoggle input{accent-color:var(--accent);width:15px;height:15px;cursor:pointer}
.modeltoggle .hint{color:var(--ink-soft);font-size:12.5px}
.err{display:none;background:#fdf0ee;border:1px solid #f0d4cd;color:#8a3a2b;border-radius:11px;
padding:11px 15px;margin:0 0 18px;font-size:14px}
.err.show{display:block}
.working{display:none;text-align:center;color:var(--ink-soft);margin:8px 0 26px;font-size:14.5px}
.working.show{display:block}
.spin{display:inline-block;width:13px;height:13px;border:2px solid var(--line);
border-top-color:var(--accent);border-radius:50%;animation:sp .7s linear infinite;
vertical-align:-2px;margin-right:7px}
@keyframes sp{to{transform:rotate(360deg)}}
/* results */
#results{display:none}
#results.show{display:block}
.filebar{display:flex;align-items:center;gap:12px;margin:22px 0 6px;background:var(--card);
border:1px solid var(--line);border-radius:var(--r);padding:14px 16px;box-shadow:var(--shadow)}
.fileicon{width:34px;height:34px;border-radius:9px;background:#eef4f1;color:var(--accent);
display:grid;place-items:center;font-size:16px;flex:none}
.filebar .nm{font-weight:600}
.filebar .meta{color:var(--ink-soft);font-size:13.5px}
.filebar .spacer{flex:1}
.pill-mini{font-size:12px;font-weight:600;color:var(--done);background:var(--done-bg);
border:1px solid var(--done-line);padding:3px 10px;border-radius:20px;white-space:nowrap}
section{margin:30px 0}
.eyebrow{font-size:12.5px;font-weight:700;letter-spacing:.06em;text-transform:uppercase;
color:var(--ink-soft);margin-bottom:12px}
.summary{background:var(--card);border:1px solid var(--line);border-radius:var(--r);
padding:6px 20px;box-shadow:var(--shadow);list-style:none;margin:0}
.summary li{padding:13px 0;border-bottom:1px solid var(--line);display:flex;gap:12px;
align-items:flex-start;font-size:15px}
.summary li:last-child{border-bottom:0}
.summary .ic{flex:none;margin-top:1px}
.summary b{font-weight:650}
.card{background:var(--card);border:1px solid var(--line);border-left-width:4px;
border-radius:var(--r);padding:16px 18px;margin:12px 0;box-shadow:var(--shadow)}
.card.done{border-left-color:var(--done)}
.card.call{border-left-color:var(--call)}
.card.pii{border-left-color:var(--pii)}
.card-top{display:flex;align-items:center;gap:10px;margin-bottom:4px;flex-wrap:wrap}
.card-title{font-weight:650;font-size:15px}
.pill{font-size:11.5px;font-weight:700;letter-spacing:.04em;padding:3px 9px;border-radius:20px;
margin-left:auto;flex:none}
.pill.done{color:var(--done);background:var(--done-bg);border:1px solid var(--done-line)}
.pill.call{color:var(--call);background:var(--call-bg);border:1px solid var(--call-line)}
.pill.pii{color:var(--pii);background:var(--pii-bg);border:1px solid var(--pii-line)}
.card-body{color:var(--ink-soft);font-size:14px}
.ba{display:grid;grid-template-columns:1fr auto 1fr;gap:10px;align-items:start;margin-top:12px}
.ba .col{background:#fbf9f5;border:1px solid var(--line);border-radius:11px;padding:10px 13px;min-width:0}
.ba .lab{font-size:11px;text-transform:uppercase;letter-spacing:.05em;color:var(--ink-soft);margin-bottom:5px}
.ba .val{font-size:13px;font-family:"SF Mono",ui-monospace,Menlo,monospace;overflow-wrap:anywhere}
.ba .was{color:#9a8d7c}
.ba .arrow{color:var(--accent);font-size:18px;align-self:center}
.badge-row{display:flex;gap:7px;flex-wrap:wrap;margin:4px 0 0}
.pii-badge{font-size:12px;font-weight:600;color:var(--pii);background:var(--pii-bg);
border:1px solid var(--pii-line);border-radius:8px;padding:2px 9px}
/* YOUR CALL hero β€” the abstention wow-moment */
.callhero{background:var(--call-bg);border:1px solid var(--call-line);border-radius:var(--r);
padding:14px 18px;margin:0 0 12px;color:#7a4a12;font-size:14.5px}
.callhero b{font-weight:700}
.tie{display:grid;grid-template-columns:auto 1fr;gap:8px 12px;align-items:center;margin-top:11px}
.tie .src{font-family:"SF Mono",ui-monospace,Menlo,monospace;font-weight:650;color:var(--call);
background:#fff;border:1px solid var(--call-line);border-radius:8px;padding:3px 9px;font-size:13px}
.tie .opts{display:flex;gap:7px;align-items:center;flex-wrap:wrap;font-size:13.5px}
.tie .opt{font:inherit;font-size:13.5px;background:#fff;border:1px solid var(--call-line);
border-radius:8px;padding:4px 11px;cursor:pointer;color:var(--ink);transition:all .12s}
.tie .opt:hover{background:var(--accent);border-color:var(--accent);color:#fff}
.tie .opt:hover .sc{color:#dff0e9}
.tie .opt .sc{color:#a9803f;font-size:11.5px;margin-left:5px}
.tie .keep{font:inherit;font-size:12.5px;background:none;border:0;color:var(--ink-soft);
cursor:pointer;text-decoration:underline;padding:4px 4px}
.tie .keep:hover{color:var(--ink)}
.tie .vs{color:#a9803f;font-weight:600;font-size:12px}
.tie .resolved{font-size:13px;color:var(--done);font-weight:600}
.tie .opt.sel{background:var(--accent);border-color:var(--accent);color:#fff;font-weight:650}
.tie .opt.sel .sc{color:#dff0e9}
.tie .keep.sel{color:var(--accent);font-weight:700}
/* fixed action bar: apply staged YOUR CALL decisions in one re-clean */
.ycbar{position:fixed;left:0;right:0;bottom:0;z-index:50;display:none;gap:14px;
align-items:center;justify-content:center;flex-wrap:wrap;background:var(--call-bg);
border-top:1px solid var(--call-line);padding:12px 18px;box-shadow:0 -4px 16px rgba(40,30,20,.10)}
.ycbar.show{display:flex}
.ycbar .txt{font-size:14px;color:#7a4a12}
body.has-ycbar{padding-bottom:68px}
/* preview table */
.tablebox{background:var(--card);border:1px solid var(--line);border-radius:var(--r);
box-shadow:var(--shadow);overflow:auto;max-height:380px}
table{border-collapse:collapse;width:100%;font-size:12.5px}
th{position:sticky;top:0;background:#f4efe7;text-align:left;padding:8px 10px;font-weight:650;
border-bottom:1px solid var(--line);white-space:nowrap}
td{padding:7px 10px;border-bottom:1px solid #f3eee6;font-family:"SF Mono",ui-monospace,Menlo,monospace;
font-size:12px;white-space:nowrap;max-width:260px;overflow:hidden;text-overflow:ellipsis}
td.chg{background:var(--done-bg)}
td.chg .old{color:#9a8d7c;text-decoration:line-through;margin-right:6px}
.capnote{color:var(--ink-soft);font-size:12.5px;margin:7px 2px}
/* audit */
.audit{display:grid;grid-template-columns:repeat(auto-fit,minmax(150px,1fr));gap:11px}
.audit .stat{background:var(--card);border:1px solid var(--line);border-radius:13px;
padding:13px 15px;box-shadow:var(--shadow)}
.audit .num{font-size:21px;font-weight:740;letter-spacing:-.4px}
.audit .lbl{font-size:12px;color:var(--ink-soft)}
/* download */
.download{background:linear-gradient(180deg,#fffdfa,#f7f2ea);border:1px solid var(--line);
border-radius:18px;padding:24px;text-align:center;box-shadow:var(--shadow)}
.download h3{margin:0 0 4px;font-size:18px;font-weight:720}
.download p{margin:0 0 16px;color:var(--ink-soft);font-size:14px}
.dl-row{display:flex;gap:11px;justify-content:center;flex-wrap:wrap}
.revert{margin-top:14px;font-size:13px;color:var(--ink-soft)}
footer{padding:28px 0 46px;text-align:center;color:#9a8d7c;font-size:13px;
border-top:1px solid var(--line);margin-top:34px}
.restart{display:inline-block;margin-top:18px;font-size:14px;color:var(--accent);
font-weight:600;cursor:pointer;text-decoration:underline}
/* ---- the ETA timer: size-aware, fun, well-mannered (never lies/completes early) ---- */
.etabar{position:fixed;left:0;top:0;height:3px;width:100%;z-index:60;display:none;background:transparent}
.etabar.show{display:block}
.etabar .fill{height:100%;width:0;border-radius:0 3px 3px 0;
background:linear-gradient(90deg,var(--accent),#5aa98c);transition:width .28s cubic-bezier(.4,0,.2,1);
box-shadow:0 0 10px rgba(47,111,94,.55)}
.etapill{position:fixed;top:13px;left:50%;transform:translateX(-50%) translateY(-6px);z-index:61;
display:none;opacity:0;align-items:center;gap:9px;background:var(--card);border:1px solid var(--line);
border-radius:999px;padding:8px 15px 8px 13px;box-shadow:var(--shadow);font-size:13.5px;
color:var(--ink);transition:opacity .2s,transform .2s;max-width:calc(100vw - 28px)}
.etapill.show{display:flex;opacity:1;transform:translateX(-50%) translateY(0)}
.etapill .dot{width:8px;height:8px;border-radius:50%;background:var(--accent);flex:none;
animation:etapulse 1.1s ease-in-out infinite}
.etapill .stage{font-weight:600;white-space:nowrap;overflow:hidden;text-overflow:ellipsis}
.etapill .eta{color:var(--ink-soft);font-variant-numeric:tabular-nums;flex:none}
@keyframes etapulse{0%,100%{opacity:.3;transform:scale(.75)}50%{opacity:1;transform:scale(1.2)}}
@media (prefers-reduced-motion: reduce){
.etabar .fill{transition:none}
.spin{animation:none}
}
/* ---- mobile: the desktop 3-col before/after grids cramp on narrow screens ---- */
@media (max-width: 600px){
.wrap{padding:0 14px}
h1{font-size:24px}
header{padding:24px 0 4px}
.sub{font-size:15px}
.ribbon{font-size:12.5px;padding:8px 12px}
.drop{padding:30px 16px}
.actions-row{flex-wrap:wrap}
.actions-row .btn{width:100%}
/* stack before -> after vertically (arrow rotates down) */
.ba{grid-template-columns:1fr;gap:8px}
.ba .arrow{transform:rotate(90deg);justify-self:center;margin:-2px 0}
/* YOUR CALL tie: stack the value above its options */
.tie{grid-template-columns:1fr;gap:6px}
.tie .src{justify-self:start}
.tie .opt{padding:7px 12px} /* bigger touch targets */
/* audit stats: 2-up instead of cramped auto-fit */
.audit{grid-template-columns:1fr 1fr;gap:8px}
.card{padding:14px 14px}
.download{padding:20px 16px}
.dl-row{flex-direction:column}
.dl-row .btn{width:100%}
section{margin:22px 0}
}
</style>
</head>
<body>
<div class="ribbon" id="ribbon">πŸ”’ <b>Your data never leaves this machine.</b> The file is read and cleaned locally; your original is untouched β€” no upload, no API, no cloud.</div>
<div class="etabar" id="etaBar"><div class="fill" id="etaFill"></div></div>
<div class="etapill" id="etaPill"><span class="dot"></span><span class="stage" id="etaStage">Reading your file…</span><span class="eta" id="etaTime"></span></div>
<div class="wrap">
<header>
<div class="logo"><span class="mark">✦</span> ScrubData</div>
<h1 id="headline">Fix the messy text in your spreadsheet.</h1>
<p class="sub" id="subline">Misspelled names, phone numbers and emails in a dozen formats, cities
typed five different ways β€” I'll correct and standardize them, protect anything sensitive, and
show you exactly what I changed. <b>I never delete your data</b>; every change is reversible, and
the judgment calls are left to you.</p>
</header>
<div class="err" id="errBox"><span id="errMsg"></span></div>
<div id="uploader">
<div class="demo">
<div class="demolab">What "cleaning" looks like β€” same rows, just fixed</div>
<div class="demogrid">
<span class="b"><s>nigeia</s></span><span class="ar">β†’</span><span class="a">Nigeria</span>
<span class="b"><s>Calfornia</s></span><span class="ar">β†’</span><span class="a">California</span>
<span class="b"><s>Ana@GMAIL.com&nbsp;</s></span><span class="ar">β†’</span><span class="a">ana@gmail.com</span>
<span class="b"><s>415.555.0192</s></span><span class="ar">β†’</span><span class="a">(415)&nbsp;555-0192</span>
</div>
<div class="demofoot">It <b>corrects and standardizes</b> messy text and removes duplicate rows β€”
it doesn't delete your data. New to this? <span class="samplelink" id="demoSampleBtn" tabindex="0" role="button" aria-label="Watch it run on a sample file">watch it run on a sample file β†’</span></div>
</div>
<div class="drop" id="dropZone" tabindex="0" role="button" aria-label="Choose a CSV or Excel file to clean" aria-describedby="dropSub">
<h3 id="dropTitle">Drop your export here</h3>
<p id="dropSub">CSV or Excel. I'll scan it for the usual mess β€” duplicates, blanks,
mismatched formats, stray spellings β€” and anything that looks like personal data.</p>
<input type="file" id="fileInput" accept=".csv,.xlsx,.xls,.tsv" hidden>
</div>
<div class="actions-row">
<button class="btn primary" id="runBtn" disabled aria-label="Clean the selected file"><span id="runLabel">Clean it up</span></button>
<span class="samplelink" id="sampleBtn" tabindex="0" role="button" aria-label="Try it on a messy sales export">try it on a messy sales export</span>
<span class="samplelink" id="sampleBtn2" tabindex="0" role="button" aria-label="Try it on an HR file with sensitive data">or an HR file with sensitive data</span>
</div>
<div class="actions-row" style="margin-top:-18px;gap:18px">
<label class="modeltoggle" id="modelToggleWrap" style="display:none" aria-label="Clean with the 4B model">
<input type="checkbox" id="modelToggle" checked aria-label="Clean with the 4B model"> <span>⚑ Clean with the 4B model <span class="hint" id="modelHint">(real fine-tune · ~1 min · uncheck for instant)</span></span>
</label>
<span class="samplelink" id="recipeLink" tabindex="0" role="button" aria-label="Apply a saved recipe to this file">↻ have a saved recipe? apply it to this file</span>
<input type="file" id="recipeInput" accept=".json" hidden>
</div>
<div class="working" id="working"><span class="spin"></span>Cleaning up β€” this runs locally, so it's quick.</div>
</div>
<div id="results">
<div class="filebar">
<div class="fileicon">β–¦</div>
<div>
<div class="nm" id="fileName">file.csv</div>
<div class="meta" id="fileMeta"></div>
</div>
<div class="spacer"></div>
<div class="pill-mini" id="fixPill"></div>
</div>
<section>
<div class="eyebrow">The summary, in plain English</div>
<ul class="summary" id="summaryList"></ul>
</section>
<section id="callSection" style="display:none">
<div class="eyebrow">The judgment calls β€” I stopped and asked instead of guessing</div>
<div class="callhero" id="callHero"></div>
<div id="callCards"></div>
</section>
<section id="piiSection" style="display:none">
<div class="eyebrow">Personal data, protected locally</div>
<div id="piiCards"></div>
</section>
<section>
<div class="eyebrow">Handled β€” already applied (and reversible)</div>
<div id="doneCards"></div>
</section>
<section>
<div class="eyebrow">Before β†’ after preview</div>
<div class="tablebox" id="tableWrap"></div>
<div class="capnote" id="capNote"></div>
</section>
<section>
<div class="eyebrow">The audit trail (no silent edits, ever)</div>
<div class="audit" id="auditGrid"></div>
</section>
<section>
<div class="download">
<h3>Your clean copy is ready</h3>
<p>Take the cleaned file and the change log. Both are yours to keep.</p>
<div class="dl-row">
<button class="btn primary" id="downloadBtn">↓ Download clean file</button>
<button class="btn" id="logBtn">Export change log</button>
<button class="btn" id="recipeBtn">πŸ’Ύ Save cleaning recipe</button>
</div>
<div class="revert">Your original is untouched β€” every change above is a named,
reversible operation. <b>Save the recipe</b> and re-apply this exact cleaning to
next month's export in one click.</div>
</div>
<div style="text-align:center"><span class="restart" id="restartBtn">← Clean another file</span></div>
</section>
</div>
</div>
<section class="quests" aria-label="Hackathon goals covered">
<h4>How this demo covers the challenge</h4>
<div class="qgrid">
<div class="quest"><span class="pill">πŸ”οΈ Tiny Titan</span>
<p>One 4-billion-param model plans every clean β€” nothing bigger, anywhere.</p></div>
<div class="quest"><span class="pill">πŸ”Œ Off the Grid</span>
<p>No third-party AI APIs β€” a local-runnable GGUF does the work (on-device when self-hosted).</p></div>
<div class="quest"><span class="pill">πŸŽ›οΈ Well-Tuned</span>
<p>Custom QLoRA fine-tune, trained on execution-verified data and published on the Hub.</p></div>
<div class="quest"><span class="pill">🎨 Off-Brand</span>
<p>Hand-built <code>gr.Server</code> interface β€” zero default Gradio chrome.</p></div>
<div class="quest"><span class="pill">πŸ¦™ Llama Champion</span>
<p>Served through llama.cpp as a Q8_0 GGUF.</p></div>
<div class="quest"><span class="pill">🀝 Sharing is Caring</span>
<p>Every run's agent traces are published as an open dataset on the Hub.</p></div>
<div class="quest"><span class="pill">πŸ““ Field Notes</span>
<p>A full build report β€” the failures documented next to the wins.</p></div>
<div class="quest"><span class="pill">🏑 Backyard AI track</span>
<p>A hands-off cleaner for the non-coder with a messy Monday export.</p></div>
</div>
</section>
<nav class="reslinks" aria-label="Research and resources">
<h4>Research &amp; resources</h4>
<a href="https://www.loom.com/share/2fa868147527496e8097d82dd546d663" target="_blank" rel="noopener">🎬 Demo video</a>
<a href="https://github.com/ricalanis/scrubdata-hackathon" target="_blank" rel="noopener">πŸ’» Code on GitHub</a>
<a href="https://huggingface.co/ricalanis/scrubdata-qwen3-4b" target="_blank" rel="noopener">🧠 Fine-tuned model</a>
<a href="https://huggingface.co/datasets/ricalanis/wildclean" target="_blank" rel="noopener">πŸ“Š WildClean dataset</a>
<a href="https://huggingface.co/datasets/build-small-hackathon/scrubdata-traces" target="_blank" rel="noopener">πŸ” Agent traces</a>
<a href="https://huggingface.co/spaces/build-small-hackathon/scrubdata/blob/main/docs/FIELD_NOTES.md" target="_blank" rel="noopener">πŸ““ Field notes</a>
<a href="https://huggingface.co/spaces/build-small-hackathon/scrubdata/blob/main/docs/paper/main.pdf" target="_blank" rel="noopener">πŸ“„ Preprint</a>
</nav>
<footer id="footerNote">Runs locally on a small model. Your data never leaves this machine.</footer>
<div class="ycbar" id="ycApplyBar">
<span class="txt">You've made <b id="ycApplyCount">0</b> <span id="ycApplyNoun">decisions</span>
β€” apply them and re-clean.</span>
<button class="btn primary" id="ycApplyBtn">βœ“ Clean now</button>
<button class="btn" id="ycClearBtn">Clear</button>
</div>
<script type="module">
import { Client, handle_file } from "https://cdn.jsdelivr.net/npm/@gradio/client";
const $ = (id) => document.getElementById(id);
let selectedFile = null, lastResult = null, loadedRecipe = null, serverReady = false;
// gate the run button on server warmth so a first click can't land on a cold
// reference-index build (the "instant" promise). refreshRun() is the single place
// that decides the button's enabled state + label.
function refreshRun(){
const btn = $("runBtn"); if (!btn) return;
btn.disabled = !(selectedFile && serverReady);
$("runLabel").textContent = !serverReady ? "Warming up…"
: (loadedRecipe ? "Apply saved recipe" : "Clean it up");
}
(async function pollReady(){
try {
const client = await Client.connect(window.location.origin);
for (let i = 0; i < 30; i++){
try { const r = await client.predict("/ready"); if (r && r.data && r.data[0] && r.data[0].ready){ serverReady = true; refreshRun(); return; } } catch {}
await new Promise(res => setTimeout(res, 1500));
}
} catch {}
serverReady = true; refreshRun(); // give up gating after ~45s; the timer is the backstop
})();
const SAMPLE_PATH = "/samples/maria_crm_export.csv";
const RENDER_CAP = 120;
// ---- honest, deployment-aware copy. On a hosted HF Space the file is processed
// on HF's servers; only a self-hosted run is truly on-device. (server injects this)
const RT = window.__SCRUBDATA_RUNTIME__ || { hosted:false, private:true,
planner:"deterministic planner", model_available:false, where:"this machine" };
(function labelRuntime(){
// the 4B model is the DEFAULT now (it's the whole point of the hackathon); show the
// toggle only if a model is wired β€” checked by default, uncheck for the instant path.
if (RT.model_available) $("modelToggleWrap").style.display = "";
if (RT.model_available && $("modelHint"))
$("modelHint").textContent = RT.hosted
? "(real fine-tune on an A100 GPU Β· ~1 min Β· uncheck for instant)"
: "(real fine-tune, on-device Β· ~1 min Β· uncheck for instant)";
if (RT.hosted){
$("ribbon").innerHTML = "🌐 <b>Hosted demo on Hugging Face.</b> " +
(RT.model_available
? "Cleans with the real <b>Qwen3-4B fine-tune</b> on an A100 GPU (~1 min; first run after idle ~2 min) β€” uncheck the box for an instant deterministic pass."
: "Deterministic cleaning.") +
" Your file is processed in the cloud (no third-party API, not stored); <b>clone &amp; run locally</b> to keep it on your machine.";
$("footerNote").textContent = "Hosted demo on Hugging Face. Cleans with the 4B fine-tune on a Modal GPU by default; " +
"clone & run locally for on-device cleaning β€” then your data never leaves your machine.";
}
// local default copy is already in the HTML (privacy-true)
})();
// pre-warm the scale-to-zero Modal GPU the moment the page loads, so it's hot by the
// time the user uploads + clicks β€” hides the ~60s cold start now that model is default.
if (RT.model_available){
(async ()=>{ try { const c = await Client.connect(window.location.origin); await c.predict("/wake"); } catch(e){} })();
}
const useModelNow = () => !!(RT.model_available && $("modelToggle") && $("modelToggle").checked);
const OP_LABEL = {
canonicalize_categories: (o)=>`Unified ${o.mapping_size||"several"} inconsistent spelling${(o.mapping_size||2)>1?"s":""}`,
strip_whitespace: ()=>"Trimmed stray spaces",
collapse_internal_whitespace: ()=>"Collapsed doubled spaces",
normalize_disguised_nulls: ()=>'Turned β€œN/A”, β€œ-”, β€œnull” into true blanks',
parse_date: ()=>"Standardized dates to YYYY-MM-DD",
parse_currency: ()=>"Parsed currency text into numbers",
parse_number: ()=>"Parsed numeric text into numbers",
parse_percent: ()=>"Parsed percentages into fractions",
standardize_boolean: ()=>"Unified Yes/No values",
standardize_phone: ()=>"Unified phone formats",
normalize_email: ()=>"Tidied email addresses",
standardize_case: ()=>"Fixed inconsistent casing",
flag_pii: (o)=>`Flagged ${String(o.pii_type||"personal data").replace(/_/g," ")}`,
mask_pii: (o)=>`Masked ${String(o.pii_type||"sensitive values").replace(/_/g," ")} (kept just enough to recognize)`,
hash_pii: (o)=>`Hashed ${String(o.pii_type||"sensitive values").replace(/_/g," ")}`,
pseudonymize_pii: (o)=>`Pseudonymized ${String(o.pii_type||"values").replace(/_/g," ")} (joins still work)`,
};
const TABLE_OP_LABEL = {
drop_empty_rows: "Removed fully-empty rows",
drop_empty_columns: "Dropped columns with no data",
drop_exact_duplicates: "Removed exact duplicate rows",
};
function showError(m){ $("errMsg").textContent = m; $("errBox").classList.add("show"); }
function clearError(){ $("errBox").classList.remove("show"); }
function pickFile(f){
selectedFile = f;
$("dropTitle").innerHTML = '<span class="picked"></span>';
$("dropTitle").firstChild.textContent = f.name;
$("dropSub").textContent = (f.size/1024).toFixed(1) + " KB Β· ready";
refreshRun(); clearError();
}
async function pickSample(path, name){
clearError();
try {
const r = await fetch(path, { cache: "no-store" });
if (!r.ok) throw new Error("Could not load sample (HTTP " + r.status + ")");
const f = new File([await r.blob()], name, { type: "text/csv" });
pickFile(f);
$("dropSub").textContent = (f.size/1024).toFixed(1) + " KB Β· sample Β· ready";
} catch (e){ showError(e.message || String(e)); }
}
$("fileInput").addEventListener("change", e => e.target.files[0] && pickFile(e.target.files[0]));
$("dropZone").addEventListener("click", () => $("fileInput").click());
$("dropZone").addEventListener("keydown", e=>{
if (e.key === "Enter" || e.key === " "){
e.preventDefault();
$("fileInput").click();
}
});
$("sampleBtn").addEventListener("click", ()=>pickSample(SAMPLE_PATH, "maria_crm_export.csv"));
$("sampleBtn2").addEventListener("click", ()=>pickSample("/samples/hr_payroll.csv", "hr_payroll.csv"));
document.querySelectorAll(".samplelink[tabindex]").forEach(el=>{
el.addEventListener("keydown", e=>{
if (e.key === "Enter" || e.key === " "){
e.preventDefault();
el.click();
}
});
});
// hero "watch it run" nudge: load the sample, then point the user at the now-active button
$("demoSampleBtn").addEventListener("click", async ()=>{
await pickSample(SAMPLE_PATH, "maria_crm_export.csv");
const rb = $("runBtn"); rb.scrollIntoView({behavior:"smooth", block:"center"});
rb.classList.add("nudge"); setTimeout(()=>rb.classList.remove("nudge"), 1600);
});
["dragenter","dragover"].forEach(ev => $("dropZone").addEventListener(ev, e=>{e.preventDefault();$("dropZone").classList.add("drag");}));
["dragleave","drop"].forEach(ev => $("dropZone").addEventListener(ev, e=>{e.preventDefault();$("dropZone").classList.remove("drag");}));
$("dropZone").addEventListener("drop", e=>{ e.dataTransfer.files[0] && pickFile(e.dataTransfer.files[0]); });
$("restartBtn").addEventListener("click", ()=>{ $("results").classList.remove("show");
$("uploader").style.display=""; $("headline").textContent="Fix the messy text in your spreadsheet.";
window.scrollTo({top:0, behavior:"smooth"}); });
// ---- save / re-apply cleaning recipe (the "Monday ritual")
$("recipeLink").addEventListener("click", ()=>$("recipeInput").click());
$("recipeInput").addEventListener("change", async e=>{
const f = e.target.files[0]; if (!f) return;
try {
loadedRecipe = await f.text(); JSON.parse(loadedRecipe); // validate it's JSON
$("recipeLink").textContent = "↻ recipe loaded: " + f.name + " β€” Clean it up to apply";
refreshRun();
} catch { showError("That doesn't look like a saved recipe (.json)."); loadedRecipe = null; }
});
$("runBtn").addEventListener("click", run);
// ---- the ETA timer: a size-aware, well-mannered progress bar + status pill. It
// eases toward our time estimate, rotates playful stage labels, holds at ~92% if
// the run overruns (never completes before the real result), and snaps to 100% on
// finish. The estimate adapts to file size AND whether the model path is active.
let _etaRAF = 0, _etaStart = 0, _etaEst = 1, _etaMode = "det", _etaDone = false;
function _estimateMs(bytes, useModel){
const kb = (bytes || 30000) / 1024;
// model path measured on the live A100 (format=json): warm ~45-80s (node variance),
// cold ~122s end-to-end. Estimate ~60s and let the bar hold gracefully past it on a
// cold start (cap below the cold number so it eases, never finishes early).
return useModel ? Math.min(135000, 55000 + kb * 45)
: Math.max(500, 450 + kb * 12); // deterministic path (~0.6s)
}
function _stage(p){
if (_etaMode === "model"){
if (p < 0.12) return "Reading your file…";
if (p < 0.30) return "Profiling the columns…";
if (p < 0.52) return "Warming up the model…";
if (p < 0.80) return "Asking the model about the tricky ones…";
return "Writing the cleaning plan…";
}
if (p < 0.15) return "Reading your file…";
if (p < 0.45) return "Profiling the columns…";
if (p < 0.75) return "Spotting the messy bits…";
return "Writing the cleaning plan…";
}
function _overrunLabel(el){
// honest copy once we pass the estimate (never a static stall)
if (_etaMode === "model")
return (Math.floor(el / 12000) % 2) ? "The model's weighing the tricky values…" : "Almost there…";
return "Warming up the server… (first run)"; // det overrun β‰ˆ a cold first clean
}
function startTimer(bytes, useModel){
_etaMode = useModel ? "model" : "det";
_etaEst = _estimateMs(bytes, useModel); _etaStart = performance.now(); _etaDone = false;
const rb = $("ribbon"); // keep the pill off the ribbon copy
if (rb) $("etaPill").style.top = (rb.offsetHeight + 8) + "px";
$("etaBar").classList.add("show"); $("etaPill").classList.add("show");
cancelAnimationFrame(_etaRAF);
const tick = ()=>{
const el = performance.now() - _etaStart;
let p = 1 - Math.pow(2, -el / (_etaEst * 0.42)); // ease-out toward 1
if (!_etaDone && p > 0.92) p = 0.92; // hold until the real result
$("etaFill").style.width = (p * 100).toFixed(1) + "%";
const over = el > _etaEst;
$("etaStage").textContent = over ? _overrunLabel(el) : _stage(p);
const left = Math.ceil((_etaEst - el) / 1000);
$("etaTime").textContent = (!over && left > 0) ? "~" + left + "s" : "";
_etaRAF = requestAnimationFrame(tick);
};
tick();
}
function stopTimer(){
_etaDone = true; cancelAnimationFrame(_etaRAF);
$("etaFill").style.width = "100%";
setTimeout(()=>{ $("etaBar").classList.remove("show"); $("etaPill").classList.remove("show");
$("etaFill").style.width = "0"; }, 320);
}
async function run(){
if (!selectedFile) return;
clearError();
const um = useModelNow() && !loadedRecipe; // model only on a fresh clean, not recipe replay
$("runBtn").disabled = true; startTimer(selectedFile.size, um);
try {
const client = await Client.connect(window.location.origin);
const res = loadedRecipe
? await client.predict("/clean_with_plan", { file_path: handle_file(selectedFile), plan_json: loadedRecipe })
: await client.predict("/clean_data", { file_path: handle_file(selectedFile), use_model: um });
const data = res.data[0];
if (!data || typeof data !== "object") throw new Error("Unexpected response from the cleaner.");
lastResult = data;
render(data);
} catch (e){ console.error(e); showError(e.message || String(e)); }
finally { refreshRun(); stopTimer(); }
}
// ---- interactive YOUR CALL: STAGE decisions, then apply them all in one re-clean.
// pendingPicks key = column + NUL + raw -> {col, raw, canon} (canon null = keep as-is)
let pendingPicks = {};
const _pk = (col, raw) => col + "\u0000" + raw;
$("callCards").addEventListener("click", e=>{
const btn = e.target.closest("[data-yc]");
if (!btn) return;
const item = (window._ycItems || [])[+btn.dataset.yc];
if (!item) return;
const key = _pk(item.col, item.raw);
if (pendingPicks[key] && pendingPicks[key].canon === item.canon) delete pendingPicks[key]; // toggle off
else pendingPicks[key] = item; // select / switch
paintYcSelections();
});
function paintYcSelections(){
document.querySelectorAll('#callCards [data-yc]').forEach(b=>{
const it = (window._ycItems || [])[+b.dataset.yc]; if (!it) return;
const p = pendingPicks[_pk(it.col, it.raw)];
b.classList.toggle('sel', !!p && p.canon === it.canon);
});
const n = Object.keys(pendingPicks).length;
$("ycApplyBar").classList.toggle("show", n > 0);
document.body.classList.toggle("has-ycbar", n > 0);
$("ycApplyCount").textContent = n;
$("ycApplyNoun").textContent = n === 1 ? "decision" : "decisions";
}
$("ycClearBtn").addEventListener("click", ()=>{ pendingPicks = {}; paintYcSelections(); });
$("ycApplyBtn").addEventListener("click", applyYourCalls);
async function applyYourCalls(){
const picks = Object.values(pendingPicks);
if (!picks.length || !lastResult || !lastResult.plan_raw || !selectedFile) return;
const plan = JSON.parse(JSON.stringify(lastResult.plan_raw));
picks.forEach(({col, raw, canon})=>{
(plan.flags || []).forEach(fl=>{
if (fl.column === col && Array.isArray(fl.values)){
fl.values = fl.values.filter(x => x !== raw);
if (fl.candidates) delete fl.candidates[raw];
}
});
if (canon){
plan.columns = plan.columns || [];
let c = plan.columns.find(x => x.name === col);
if (!c){ c = {name: col, detected_semantic_type: "categorical", operations: []}; plan.columns.push(c); }
c.operations = c.operations || [];
let op = c.operations.find(o => o.op === "canonicalize_categories");
if (!op){ op = {op: "canonicalize_categories", mapping: {}, rationale: "Your decisions (resolved from review)."}; c.operations.push(op); }
op.mapping = op.mapping || {}; op.mapping[raw] = canon;
}
});
plan.flags = (plan.flags || []).filter(fl =>
!(Array.isArray(fl.values) && fl.values.length === 0 &&
(fl.issue === "uncertain_canonicalization" || fl.issue === "suspect_values")));
const apply = $("ycApplyBtn"); apply.disabled = true; apply.textContent = "cleaning…"; clearError();
startTimer(selectedFile.size, false); // replay applies a saved plan β€” no model call
try {
const client = await Client.connect(window.location.origin);
const res = await client.predict("/clean_with_plan",
{ file_path: handle_file(selectedFile), plan_json: JSON.stringify(plan) });
const data = res.data[0];
if (!data || typeof data !== "object") throw new Error("Couldn't apply your decisions.");
lastResult = data; render(data); // render() resets pendingPicks + repaints
const sec = $("callSection"); window.scrollTo({top: sec && sec.style.display!=="none" ? Math.max(0, sec.offsetTop-16) : 0, behavior:"smooth"});
} catch (e){ console.error(e); showError(e.message || String(e)); }
finally { apply.disabled = false; apply.textContent = "βœ“ Clean now"; stopTimer(); }
}
function esc(s){ const d=document.createElement("div"); d.textContent = s==null?"":String(s); return d.innerHTML; }
function columnExamples(data, col, max=2){
// index-aligned examples when row count is unchanged (mask/format ops never drop rows)
if (data.total_rows_before !== data.total_rows_after) return [];
const out = [];
for (let i=0; i<Math.min(data.before.length, data.after.length); i++){
const b = data.before[i][col], a = data.after[i][col];
if (String(b) !== String(a)) { out.push([b, a]); if (out.length >= max) break; }
}
return out;
}
function render(data){
$("uploader").style.display = "none";
$("headline").textContent = "Done. Here's what changed.";
$("results").classList.add("show");
$("fileName").textContent = selectedFile ? selectedFile.name : "your file";
const secs = data.elapsed_ms != null ? (data.elapsed_ms/1000) : null;
const timeStr = secs == null ? "" : (secs < 1 ? ` Β· in ${Math.round(data.elapsed_ms)}ms` : ` Β· in ${secs.toFixed(1)}s`);
const whereStr = RT.hosted ? "cleaned on HF's servers" : "cleaned on-device";
const gen = (data.plan_raw && data.plan_raw._generated_by) || "";
const plannerLabel = /model:/i.test(gen) ? "Qwen3-4B fine-tune" : "deterministic planner";
$("fileMeta").textContent = `${data.total_rows_before.toLocaleString()} rows Γ— ${data.columns_before.length} columns Β· ${whereStr} Β· ${plannerLabel}${timeStr}`;
const nChanges = (data.change_log||[]).length;
$("fixPill").textContent = `${nChanges} change${nChanges===1?"":"s"} applied`;
// ---- plain-English summary
const sl = $("summaryList"); sl.innerHTML = "";
const items = [];
const cols = data.plan_columns || [];
const flags = data.flags || [];
const canonCols = cols.filter(c => c.operations.some(o=>o.op==="canonicalize_categories"));
const totalMapped = canonCols.reduce((n,c)=>n + c.operations.filter(o=>o.op==="canonicalize_categories")
.reduce((m,o)=>m+(o.mapping_size||0),0), 0);
if (canonCols.length) items.push(["πŸ—‚οΈ", `<b>Unified ${totalMapped} inconsistent spelling${totalMapped===1?"":"s"}</b> across ${canonCols.length} column${canonCols.length===1?"":"s"} (${esc(canonCols.map(c=>c.name).join(", "))}).`]);
const fmtOps = ["parse_date","parse_currency","parse_percent","standardize_phone","standardize_boolean","normalize_email","standardize_case"];
const fmtCols = cols.filter(c => c.operations.some(o=>fmtOps.includes(o.op)));
if (fmtCols.length) items.push(["πŸ“…", `<b>Standardized formats</b> (dates, numbers, phones, emails) in ${fmtCols.length} column${fmtCols.length===1?"":"s"}.`]);
const nullCols = cols.filter(c => c.operations.some(o=>o.op==="normalize_disguised_nulls"));
if (nullCols.length) items.push(["⬜", `<b>Treated disguised blanks</b> (β€œN/A”, β€œβ€”β€, β€œnull”) as truly empty in ${nullCols.length} column${nullCols.length===1?"":"s"}, so counts and filters behave.`]);
const maskCols = cols.filter(c => c.operations.some(o=>["mask_pii","hash_pii","pseudonymize_pii"].includes(o.op)));
const flagPii = cols.filter(c => c.operations.some(o=>o.op==="flag_pii") && !maskCols.includes(c));
if (maskCols.length) items.push(["πŸ›‘οΈ", `<b>Protected ${maskCols.length} sensitive column${maskCols.length===1?"":"s"}</b> (${esc(maskCols.map(c=>c.name).join(", "))}) β€” masked locally, nothing left this machine.`]);
if (flagPii.length) items.push(["πŸ”", `<b>Spotted personal data</b> in ${esc(flagPii.map(c=>c.name).join(", "))} β€” flagged, not changed.`]);
const tableOpsSeen = new Set();
(data.change_log||[]).forEach(e=>{ if (e && e.op && TABLE_OP_LABEL[e.op]) tableOpsSeen.add(e.op); });
tableOpsSeen.forEach(op => items.push(["🧹", `<b>${TABLE_OP_LABEL[op]}</b>.`]));
if (flags.length) items.push(["βœ‹", `<span style="color:var(--call)"><b>${flags.length} thing${flags.length===1?"":"s"} need${flags.length===1?"s":""} your judgment</b> β€” left untouched below.</span>`]);
if (!items.length) items.push(["✨", "This file was already in good shape β€” nothing needed changing."]);
items.forEach(([ic, html])=>{ const li=document.createElement("li");
li.innerHTML=`<span class="ic">${ic}</span><div>${html}</div>`; sl.appendChild(li); });
// ---- PII cards
const piiCards = $("piiCards"); piiCards.innerHTML = "";
const piiCols = cols.filter(c => c.operations.some(o => (o.op||"").includes("pii")));
const piiAlerts = data.pii_alerts || [];
$("piiSection").style.display = (piiCols.length || piiAlerts.length) ? "" : "none";
// embedded-PII alerts: cards/SSNs buried in free-text columns (review, not auto-masked)
piiAlerts.forEach(a=>{
const ptype = String(a.pii_type||"sensitive data").replace(/_/g," ");
const card = document.createElement("div");
card.className = "card pii";
card.innerHTML = `
<div class="card-top"><span class="card-title">${esc(a.column)}</span>
<span class="badge-row"><span class="pii-badge">${esc(ptype)}</span></span>
<span class="pill pii">REVIEW</span></div>
<div class="card-body">Found <b>${a.count}</b> ${esc(ptype)}-shaped value${a.count===1?"":"s"}
buried inside this free-text column (e.g. <code>${esc(a.example||"")}</code>) β€” they slipped
past column-level detection. Left as-is so your notes aren't mangled; <b>review before sharing</b>.</div>`;
piiCards.appendChild(card);
});
piiCols.forEach(c=>{
const ops = c.operations.filter(o=>(o.op||"").includes("pii"));
const masked = ops.find(o=>["mask_pii","hash_pii","pseudonymize_pii"].includes(o.op));
const ptype = (ops[0].pii_type||"personal data").replace(/_/g," ");
const ex = masked ? columnExamples(data, c.name) : [];
const card = document.createElement("div");
card.className = "card pii";
card.innerHTML = `
<div class="card-top"><span class="card-title">${esc(c.name)}</span>
<span class="badge-row"><span class="pii-badge">${esc(ptype)}</span></span>
<span class="pill pii">${masked ? "PROTECTED" : "FLAGGED"}</span></div>
<div class="card-body">${masked
? esc(masked.rationale || "Masked locally; the original file is untouched.")
: "Contains " + esc(ptype) + " β€” flagged for your awareness, values left exactly as they were."}</div>
${ex.length ? `<div class="ba">
<div class="col"><div class="lab">Before</div>${ex.map(([b])=>`<div class="val was">${esc(b)}</div>`).join("")}</div>
<div class="arrow">β†’</div>
<div class="col"><div class="lab">After</div>${ex.map(([,a])=>`<div class="val">${esc(a)}</div>`).join("")}</div>
</div>` : ""}`;
piiCards.appendChild(card);
});
// ---- DONE cards (non-PII applied ops)
const doneCards = $("doneCards"); doneCards.innerHTML = "";
cols.forEach(c=>{
const ops = c.operations.filter(o=>!(o.op||"").includes("pii"));
if (!ops.length) return;
const canon = ops.find(o=>o.op==="canonicalize_categories");
const sample = canon && canon.mapping_sample ? Object.entries(canon.mapping_sample).slice(0,3) : [];
const card = document.createElement("div");
card.className = "card done";
card.innerHTML = `
<div class="card-top"><span class="card-title">${esc(c.name)}</span><span class="pill done">DONE</span></div>
<div class="card-body">${ops.map(o=>esc((OP_LABEL[o.op]||(()=>o.op))(o))).join(" Β· ")}</div>
${sample.length ? `<div class="ba">
<div class="col"><div class="lab">Before</div>${sample.map(([b])=>`<div class="val was">${esc(b)}</div>`).join("")}</div>
<div class="arrow">β†’</div>
<div class="col"><div class="lab">After</div>${sample.map(([,a])=>`<div class="val">${esc(a)}</div>`).join("")}</div>
</div>` : ""}`;
doneCards.appendChild(card);
});
if (!doneCards.children.length)
doneCards.innerHTML = `<div class="card done"><div class="card-body">${
tableOpsSeen.size
? "No column fixes were needed beyond the table-level tidy-up."
: "No automatic fixes were needed here."
}</div></div>`;
// ---- YOUR CALL (review flags) β€” the abstention hero
const callCards = $("callCards"); callCards.innerHTML = "";
const callHero = $("callHero"); callHero.innerHTML = "";
$("callSection").style.display = flags.length ? "" : "none";
// merge flags by column, and per value keep the RICHEST candidate list β€” the
// backend can emit two flags for one column (one carrying the candidate ties,
// one bare); without this merge the bare "no close match" card buries the real
// tie (e.g. Slovia β†’ Slovakia 86% vs Slovenia 86%).
const byCol = new Map();
flags.forEach(f=>{
const col = f.column || "";
if (!byCol.has(col)) byCol.set(col, {col, rationale:"", vals:new Map()});
const e = byCol.get(col);
const cands = f.candidates || {};
const hasCands = Object.keys(cands).length > 0;
if ((f.rationale||f.issue) && (!e.rationale || hasCands)) e.rationale = f.rationale || f.issue;
(f.values||[]).forEach(v=>{
const cs = cands[v] || [];
const prev = e.vals.get(v);
if (prev === undefined || cs.length > prev.length) e.vals.set(v, cs);
});
});
const merged = [...byCol.values()];
if (merged.length){
let tieCount = 0, total = 0;
merged.forEach(m => m.vals.forEach(cs => { total++; if ((cs||[]).length>=2) tieCount++; }));
callHero.innerHTML = `<b>I left ${total} value${total===1?"":"s"} alone on purpose.</b> ` +
(tieCount ? `${tieCount} ${tieCount===1?"is a genuine toss-up":"are genuine toss-ups"} β€” close enough that guessing would be a coin flip. ` : "") +
`A wrong "fix" is worse than no fix β€” so <b>you decide: tap a match to apply it</b>, or keep the value as-is.`;
}
pendingPicks = {}; // fresh analysis -> no staged decisions carried over
const ycItems = []; // resolution targets, indexed by data-yc (injection-safe)
merged.forEach(m=>{
const card = document.createElement("div");
card.className = "card call";
const col = m.col;
const vals = [...m.vals.entries()].slice(0,12);
// render each uncertain value with clickable candidate buttons + keep-as-is
const rowsHtml = vals.map(([v, csAll])=>{
const cs = (csAll||[]).slice(0,2);
let opts = cs.map((c,i)=>{
const idx = ycItems.push({col, raw:v, canon:c.canon}) - 1;
return `${i?'<span class="vs">or</span>':''}<button class="opt" data-yc="${idx}" aria-label="Use ${esc(c.canon)} for ${esc(v)}">${esc(c.canon)}<span class="sc">${(c.score*100).toFixed(0)}%</span></button>`;
}).join("");
if (!cs.length) opts = `<span class="vs">no close match</span>`;
const keepIdx = ycItems.push({col, raw:v, canon:null}) - 1;
opts += `<button class="keep" data-yc="${keepIdx}" aria-label="Keep ${esc(v)} as is">keep β€œ${esc(v)}”</button>`;
return `<div class="src">${esc(v)}</div><div class="opts">${opts}</div>`;
}).join("");
card.innerHTML = `
<div class="card-top"><span class="card-title">${esc(col)}</span><span class="pill call">YOUR CALL</span></div>
<div class="card-body">${esc(m.rationale||"Left for review.")}</div>
${rowsHtml ? `<div class="tie">${rowsHtml}</div>` : ""}`;
callCards.appendChild(card);
});
window._ycItems = ycItems;
paintYcSelections(); // hide the action bar on a fresh render
// ---- preview table (after, changed cells highlighted, old value struck through)
const wrap = $("tableWrap");
const colsAfter = data.columns_after || [];
const n = Math.min(data.after.length, RENDER_CAP);
const aligned = data.total_rows_before === data.total_rows_after;
let html = "<table><thead><tr>" + colsAfter.map(c=>`<th>${esc(c)}</th>`).join("") + "</tr></thead><tbody>";
for (let i=0; i<n; i++){
html += "<tr>" + colsAfter.map(c=>{
const a = data.after[i] ? data.after[i][c] : "";
const b = aligned && data.before[i] ? data.before[i][c] : a;
if (String(a) !== String(b))
return `<td class="chg"><span class="old">${esc(b)}</span>${esc(a)}</td>`;
return `<td>${esc(a)}</td>`;
}).join("") + "</tr>";
}
wrap.innerHTML = html + "</tbody></table>";
$("capNote").textContent = data.total_rows_after > n
? `Showing the first ${n} of ${data.total_rows_after.toLocaleString()} rows β€” the download has everything.` : "";
// ---- audit grid (monitorability)
const m = data.monitor || {};
const audit = $("auditGrid"); audit.innerHTML = "";
[["columns_touched","columns touched"],["canonicalizations","values unified"],
["grounded_columns","reference-grounded"],["abstentions","abstained β†’ review"],
["pii_columns_protected","PII columns protected"],
["changes_applied","changes logged"],["silent_edits","silent edits"]]
.forEach(([k,lbl])=>{
const v = (m[k] === null || m[k] === undefined) ? "β€”" : m[k];
const d = document.createElement("div");
d.className = "stat";
d.innerHTML = `<div class="num">${esc(v)}</div><div class="lbl">${lbl}</div>`;
audit.appendChild(d);
});
}
$("downloadBtn").addEventListener("click", ()=>{
if (!lastResult || !lastResult.csv_text) return;
const url = URL.createObjectURL(new Blob([lastResult.csv_text], {type:"text/csv;charset=utf-8"}));
const a = document.createElement("a"); a.href=url; a.download="scrubbed.csv";
document.body.appendChild(a); a.click(); a.remove();
setTimeout(()=>URL.revokeObjectURL(url), 1000);
});
$("logBtn").addEventListener("click", ()=>{
if (!lastResult) return;
const payload = JSON.stringify({change_log:lastResult.change_log, plan_columns:lastResult.plan_columns,
flags:lastResult.flags, monitor:lastResult.monitor}, null, 2);
const url = URL.createObjectURL(new Blob([payload], {type:"application/json"}));
const a = document.createElement("a"); a.href=url; a.download="scrubdata-changelog.json";
document.body.appendChild(a); a.click(); a.remove();
setTimeout(()=>URL.revokeObjectURL(url), 1000);
});
$("recipeBtn").addEventListener("click", ()=>{
const recipe = lastResult && lastResult.plan_raw;
if (!recipe) return;
const url = URL.createObjectURL(new Blob([JSON.stringify(recipe, null, 2)], {type:"application/json"}));
const a = document.createElement("a"); a.href=url; a.download="scrubdata-recipe.json";
document.body.appendChild(a); a.click(); a.remove();
setTimeout(()=>URL.revokeObjectURL(url), 1000);
});
</script>
</body>
</html>