Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="utf-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1"> | |
| <title>ScrubData β clean spreadsheets, with the receipts</title> | |
| <style> | |
| :root{ | |
| --paper:#faf7f2; --card:#fffdfa; --ink:#23201c; --ink-soft:#6b6359; | |
| --line:#ece5da; --accent:#2f6f5e; --accent-soft:#e7f1ec; | |
| --done:#3f7d5f; --done-bg:#eef5ef; --done-line:#cfe3d4; | |
| --call:#b06a1f; --call-bg:#fbf1e2; --call-line:#f0dcbf; | |
| --flag:#7a7367; --flag-bg:#f3efe8; | |
| --pii:#8a4baf; --pii-bg:#f3ecf9; --pii-line:#e2d3ef; | |
| --shadow:0 1px 2px rgba(40,30,20,.04),0 8px 24px rgba(40,30,20,.06); | |
| --r:15px; | |
| } | |
| *{box-sizing:border-box} | |
| body{margin:0;background:var(--paper);color:var(--ink); | |
| font-family:Inter,-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,sans-serif; | |
| line-height:1.5;-webkit-font-smoothing:antialiased} | |
| .wrap{max-width:780px;margin:0 auto;padding:0 22px} | |
| a{color:var(--accent)} | |
| .ribbon{background:var(--accent-soft);color:#234e42;font-size:13.5px; | |
| text-align:center;padding:9px 16px;border-bottom:1px solid #d6e7df} | |
| .ribbon b{font-weight:600} | |
| header{padding:36px 0 6px} | |
| .logo{display:flex;align-items:center;gap:9px;font-weight:700;font-size:18px;letter-spacing:-.2px} | |
| .logo .mark{width:26px;height:26px;border-radius:8px;background:var(--accent); | |
| display:grid;place-items:center;color:#fff;font-size:15px} | |
| h1{font-size:29px;line-height:1.15;letter-spacing:-.6px;margin:20px 0 8px;font-weight:740} | |
| .sub{color:var(--ink-soft);font-size:16px;max-width:600px} | |
| /* upload */ | |
| .drop{margin:24px 0;background:var(--card);border:2px dashed var(--line);border-radius:18px; | |
| padding:42px 24px;text-align:center;cursor:pointer;transition:border-color .15s, background .15s} | |
| .drop:focus-visible, | |
| .btn:focus-visible, | |
| .samplelink:focus-visible, | |
| .modeltoggle:focus-within, | |
| .tie .opt:focus-visible, | |
| .tie .keep:focus-visible{ | |
| outline:3px solid var(--accent); | |
| outline-offset:3px; | |
| } | |
| .drop.drag{border-color:var(--accent);background:var(--accent-soft)} | |
| .drop h3{margin:0 0 4px;font-size:18px;font-weight:700} | |
| .drop p{margin:0;color:var(--ink-soft);font-size:14px} | |
| .drop .picked{color:var(--accent);font-weight:650} | |
| .actions-row{display:flex;gap:11px;align-items:center;justify-content:center;margin:6px 0 30px} | |
| .btn{font:inherit;font-size:14.5px;font-weight:600;padding:11px 20px;border-radius:11px; | |
| cursor:pointer;border:1px solid var(--line);background:#fff;color:var(--ink)} | |
| .btn.primary{background:var(--accent);border-color:var(--accent);color:#fff} | |
| .btn.primary:disabled{opacity:.45;cursor:not-allowed} | |
| .btn.nudge{animation:nudge 1.6s ease} | |
| @keyframes nudge{0%,100%{box-shadow:0 0 0 0 rgba(47,111,94,0)} | |
| 20%,60%{box-shadow:0 0 0 6px rgba(47,111,94,.18)}} | |
| .samplelink{font-size:13.5px;color:var(--accent);cursor:pointer;text-decoration:underline} | |
| /* hero "what cleaning means" demo strip */ | |
| .demo{margin:26px 0 6px;background:var(--card);border:1px solid var(--line);border-radius:18px; | |
| padding:18px 22px;box-shadow:var(--shadow)} | |
| .demolab{font-size:11.5px;letter-spacing:.07em;text-transform:uppercase;color:var(--ink-soft); | |
| font-weight:700;margin-bottom:14px;text-align:center} | |
| .demogrid{display:grid;grid-template-columns:1fr auto 1fr;gap:9px 14px;align-items:center; | |
| font-family:"SF Mono",ui-monospace,Menlo,monospace;font-size:13.5px} | |
| .demogrid .b{color:#9a8d7c;text-align:right} | |
| .demogrid .b s{text-decoration-color:#dcc8be} | |
| .demogrid .ar{color:var(--accent);text-align:center;font-size:15px} | |
| .demogrid .a{color:var(--done);font-weight:650} | |
| .demofoot{text-align:center;color:var(--ink-soft);font-size:13.5px;margin-top:16px;line-height:1.5} | |
| .demofoot b{color:var(--ink);font-weight:650} | |
| @media(max-width:520px){.demogrid{font-size:12px;gap:7px 8px}} | |
| .quests{max-width:840px;margin:36px auto 8px;padding:0 22px} | |
| .quests h4{font-size:12px;letter-spacing:.08em;text-transform:uppercase; | |
| color:var(--ink-soft);margin:0 0 11px;font-weight:700;text-align:center} | |
| .qgrid{display:grid;grid-template-columns:repeat(auto-fit,minmax(225px,1fr));gap:10px} | |
| .quest{background:var(--card);border:1px solid var(--line);border-radius:12px;padding:12px 13px} | |
| .quest .pill{display:inline-flex;align-items:center;gap:6px;font-size:11.5px;font-weight:700; | |
| color:#234e42;background:var(--accent-soft);border-radius:999px;padding:3px 10px;margin-bottom:8px} | |
| .quest p{margin:0;font-size:12.5px;color:var(--ink-soft);line-height:1.45} | |
| .reslinks{max-width:760px;margin:30px auto 4px;padding:0 22px;text-align:center} | |
| .reslinks h4{font-size:12px;letter-spacing:.08em;text-transform:uppercase; | |
| color:var(--ink-soft);margin:0 0 9px;font-weight:700} | |
| .reslinks a{display:inline-block;margin:4px 9px;font-size:13.5px;color:var(--accent); | |
| text-decoration:none;border-bottom:1px solid var(--accent-soft)} | |
| .reslinks a:hover{border-bottom-color:var(--accent)} | |
| .modeltoggle{display:flex;align-items:center;gap:7px;font-size:13.5px;color:var(--ink);cursor:pointer;user-select:none} | |
| .modeltoggle input{accent-color:var(--accent);width:15px;height:15px;cursor:pointer} | |
| .modeltoggle .hint{color:var(--ink-soft);font-size:12.5px} | |
| .err{display:none;background:#fdf0ee;border:1px solid #f0d4cd;color:#8a3a2b;border-radius:11px; | |
| padding:11px 15px;margin:0 0 18px;font-size:14px} | |
| .err.show{display:block} | |
| .working{display:none;text-align:center;color:var(--ink-soft);margin:8px 0 26px;font-size:14.5px} | |
| .working.show{display:block} | |
| .spin{display:inline-block;width:13px;height:13px;border:2px solid var(--line); | |
| border-top-color:var(--accent);border-radius:50%;animation:sp .7s linear infinite; | |
| vertical-align:-2px;margin-right:7px} | |
| @keyframes sp{to{transform:rotate(360deg)}} | |
| /* results */ | |
| #results{display:none} | |
| #results.show{display:block} | |
| .filebar{display:flex;align-items:center;gap:12px;margin:22px 0 6px;background:var(--card); | |
| border:1px solid var(--line);border-radius:var(--r);padding:14px 16px;box-shadow:var(--shadow)} | |
| .fileicon{width:34px;height:34px;border-radius:9px;background:#eef4f1;color:var(--accent); | |
| display:grid;place-items:center;font-size:16px;flex:none} | |
| .filebar .nm{font-weight:600} | |
| .filebar .meta{color:var(--ink-soft);font-size:13.5px} | |
| .filebar .spacer{flex:1} | |
| .pill-mini{font-size:12px;font-weight:600;color:var(--done);background:var(--done-bg); | |
| border:1px solid var(--done-line);padding:3px 10px;border-radius:20px;white-space:nowrap} | |
| section{margin:30px 0} | |
| .eyebrow{font-size:12.5px;font-weight:700;letter-spacing:.06em;text-transform:uppercase; | |
| color:var(--ink-soft);margin-bottom:12px} | |
| .summary{background:var(--card);border:1px solid var(--line);border-radius:var(--r); | |
| padding:6px 20px;box-shadow:var(--shadow);list-style:none;margin:0} | |
| .summary li{padding:13px 0;border-bottom:1px solid var(--line);display:flex;gap:12px; | |
| align-items:flex-start;font-size:15px} | |
| .summary li:last-child{border-bottom:0} | |
| .summary .ic{flex:none;margin-top:1px} | |
| .summary b{font-weight:650} | |
| .card{background:var(--card);border:1px solid var(--line);border-left-width:4px; | |
| border-radius:var(--r);padding:16px 18px;margin:12px 0;box-shadow:var(--shadow)} | |
| .card.done{border-left-color:var(--done)} | |
| .card.call{border-left-color:var(--call)} | |
| .card.pii{border-left-color:var(--pii)} | |
| .card-top{display:flex;align-items:center;gap:10px;margin-bottom:4px;flex-wrap:wrap} | |
| .card-title{font-weight:650;font-size:15px} | |
| .pill{font-size:11.5px;font-weight:700;letter-spacing:.04em;padding:3px 9px;border-radius:20px; | |
| margin-left:auto;flex:none} | |
| .pill.done{color:var(--done);background:var(--done-bg);border:1px solid var(--done-line)} | |
| .pill.call{color:var(--call);background:var(--call-bg);border:1px solid var(--call-line)} | |
| .pill.pii{color:var(--pii);background:var(--pii-bg);border:1px solid var(--pii-line)} | |
| .card-body{color:var(--ink-soft);font-size:14px} | |
| .ba{display:grid;grid-template-columns:1fr auto 1fr;gap:10px;align-items:start;margin-top:12px} | |
| .ba .col{background:#fbf9f5;border:1px solid var(--line);border-radius:11px;padding:10px 13px;min-width:0} | |
| .ba .lab{font-size:11px;text-transform:uppercase;letter-spacing:.05em;color:var(--ink-soft);margin-bottom:5px} | |
| .ba .val{font-size:13px;font-family:"SF Mono",ui-monospace,Menlo,monospace;overflow-wrap:anywhere} | |
| .ba .was{color:#9a8d7c} | |
| .ba .arrow{color:var(--accent);font-size:18px;align-self:center} | |
| .badge-row{display:flex;gap:7px;flex-wrap:wrap;margin:4px 0 0} | |
| .pii-badge{font-size:12px;font-weight:600;color:var(--pii);background:var(--pii-bg); | |
| border:1px solid var(--pii-line);border-radius:8px;padding:2px 9px} | |
| /* YOUR CALL hero β the abstention wow-moment */ | |
| .callhero{background:var(--call-bg);border:1px solid var(--call-line);border-radius:var(--r); | |
| padding:14px 18px;margin:0 0 12px;color:#7a4a12;font-size:14.5px} | |
| .callhero b{font-weight:700} | |
| .tie{display:grid;grid-template-columns:auto 1fr;gap:8px 12px;align-items:center;margin-top:11px} | |
| .tie .src{font-family:"SF Mono",ui-monospace,Menlo,monospace;font-weight:650;color:var(--call); | |
| background:#fff;border:1px solid var(--call-line);border-radius:8px;padding:3px 9px;font-size:13px} | |
| .tie .opts{display:flex;gap:7px;align-items:center;flex-wrap:wrap;font-size:13.5px} | |
| .tie .opt{font:inherit;font-size:13.5px;background:#fff;border:1px solid var(--call-line); | |
| border-radius:8px;padding:4px 11px;cursor:pointer;color:var(--ink);transition:all .12s} | |
| .tie .opt:hover{background:var(--accent);border-color:var(--accent);color:#fff} | |
| .tie .opt:hover .sc{color:#dff0e9} | |
| .tie .opt .sc{color:#a9803f;font-size:11.5px;margin-left:5px} | |
| .tie .keep{font:inherit;font-size:12.5px;background:none;border:0;color:var(--ink-soft); | |
| cursor:pointer;text-decoration:underline;padding:4px 4px} | |
| .tie .keep:hover{color:var(--ink)} | |
| .tie .vs{color:#a9803f;font-weight:600;font-size:12px} | |
| .tie .resolved{font-size:13px;color:var(--done);font-weight:600} | |
| .tie .opt.sel{background:var(--accent);border-color:var(--accent);color:#fff;font-weight:650} | |
| .tie .opt.sel .sc{color:#dff0e9} | |
| .tie .keep.sel{color:var(--accent);font-weight:700} | |
| /* fixed action bar: apply staged YOUR CALL decisions in one re-clean */ | |
| .ycbar{position:fixed;left:0;right:0;bottom:0;z-index:50;display:none;gap:14px; | |
| align-items:center;justify-content:center;flex-wrap:wrap;background:var(--call-bg); | |
| border-top:1px solid var(--call-line);padding:12px 18px;box-shadow:0 -4px 16px rgba(40,30,20,.10)} | |
| .ycbar.show{display:flex} | |
| .ycbar .txt{font-size:14px;color:#7a4a12} | |
| body.has-ycbar{padding-bottom:68px} | |
| /* preview table */ | |
| .tablebox{background:var(--card);border:1px solid var(--line);border-radius:var(--r); | |
| box-shadow:var(--shadow);overflow:auto;max-height:380px} | |
| table{border-collapse:collapse;width:100%;font-size:12.5px} | |
| th{position:sticky;top:0;background:#f4efe7;text-align:left;padding:8px 10px;font-weight:650; | |
| border-bottom:1px solid var(--line);white-space:nowrap} | |
| td{padding:7px 10px;border-bottom:1px solid #f3eee6;font-family:"SF Mono",ui-monospace,Menlo,monospace; | |
| font-size:12px;white-space:nowrap;max-width:260px;overflow:hidden;text-overflow:ellipsis} | |
| td.chg{background:var(--done-bg)} | |
| td.chg .old{color:#9a8d7c;text-decoration:line-through;margin-right:6px} | |
| .capnote{color:var(--ink-soft);font-size:12.5px;margin:7px 2px} | |
| /* audit */ | |
| .audit{display:grid;grid-template-columns:repeat(auto-fit,minmax(150px,1fr));gap:11px} | |
| .audit .stat{background:var(--card);border:1px solid var(--line);border-radius:13px; | |
| padding:13px 15px;box-shadow:var(--shadow)} | |
| .audit .num{font-size:21px;font-weight:740;letter-spacing:-.4px} | |
| .audit .lbl{font-size:12px;color:var(--ink-soft)} | |
| /* download */ | |
| .download{background:linear-gradient(180deg,#fffdfa,#f7f2ea);border:1px solid var(--line); | |
| border-radius:18px;padding:24px;text-align:center;box-shadow:var(--shadow)} | |
| .download h3{margin:0 0 4px;font-size:18px;font-weight:720} | |
| .download p{margin:0 0 16px;color:var(--ink-soft);font-size:14px} | |
| .dl-row{display:flex;gap:11px;justify-content:center;flex-wrap:wrap} | |
| .revert{margin-top:14px;font-size:13px;color:var(--ink-soft)} | |
| footer{padding:28px 0 46px;text-align:center;color:#9a8d7c;font-size:13px; | |
| border-top:1px solid var(--line);margin-top:34px} | |
| .restart{display:inline-block;margin-top:18px;font-size:14px;color:var(--accent); | |
| font-weight:600;cursor:pointer;text-decoration:underline} | |
| /* ---- the ETA timer: size-aware, fun, well-mannered (never lies/completes early) ---- */ | |
| .etabar{position:fixed;left:0;top:0;height:3px;width:100%;z-index:60;display:none;background:transparent} | |
| .etabar.show{display:block} | |
| .etabar .fill{height:100%;width:0;border-radius:0 3px 3px 0; | |
| background:linear-gradient(90deg,var(--accent),#5aa98c);transition:width .28s cubic-bezier(.4,0,.2,1); | |
| box-shadow:0 0 10px rgba(47,111,94,.55)} | |
| .etapill{position:fixed;top:13px;left:50%;transform:translateX(-50%) translateY(-6px);z-index:61; | |
| display:none;opacity:0;align-items:center;gap:9px;background:var(--card);border:1px solid var(--line); | |
| border-radius:999px;padding:8px 15px 8px 13px;box-shadow:var(--shadow);font-size:13.5px; | |
| color:var(--ink);transition:opacity .2s,transform .2s;max-width:calc(100vw - 28px)} | |
| .etapill.show{display:flex;opacity:1;transform:translateX(-50%) translateY(0)} | |
| .etapill .dot{width:8px;height:8px;border-radius:50%;background:var(--accent);flex:none; | |
| animation:etapulse 1.1s ease-in-out infinite} | |
| .etapill .stage{font-weight:600;white-space:nowrap;overflow:hidden;text-overflow:ellipsis} | |
| .etapill .eta{color:var(--ink-soft);font-variant-numeric:tabular-nums;flex:none} | |
| @keyframes etapulse{0%,100%{opacity:.3;transform:scale(.75)}50%{opacity:1;transform:scale(1.2)}} | |
| @media (prefers-reduced-motion: reduce){ | |
| .etabar .fill{transition:none} | |
| .spin{animation:none} | |
| } | |
| /* ---- mobile: the desktop 3-col before/after grids cramp on narrow screens ---- */ | |
| @media (max-width: 600px){ | |
| .wrap{padding:0 14px} | |
| h1{font-size:24px} | |
| header{padding:24px 0 4px} | |
| .sub{font-size:15px} | |
| .ribbon{font-size:12.5px;padding:8px 12px} | |
| .drop{padding:30px 16px} | |
| .actions-row{flex-wrap:wrap} | |
| .actions-row .btn{width:100%} | |
| /* stack before -> after vertically (arrow rotates down) */ | |
| .ba{grid-template-columns:1fr;gap:8px} | |
| .ba .arrow{transform:rotate(90deg);justify-self:center;margin:-2px 0} | |
| /* YOUR CALL tie: stack the value above its options */ | |
| .tie{grid-template-columns:1fr;gap:6px} | |
| .tie .src{justify-self:start} | |
| .tie .opt{padding:7px 12px} /* bigger touch targets */ | |
| /* audit stats: 2-up instead of cramped auto-fit */ | |
| .audit{grid-template-columns:1fr 1fr;gap:8px} | |
| .card{padding:14px 14px} | |
| .download{padding:20px 16px} | |
| .dl-row{flex-direction:column} | |
| .dl-row .btn{width:100%} | |
| section{margin:22px 0} | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="ribbon" id="ribbon">π <b>Your data never leaves this machine.</b> The file is read and cleaned locally; your original is untouched β no upload, no API, no cloud.</div> | |
| <div class="etabar" id="etaBar"><div class="fill" id="etaFill"></div></div> | |
| <div class="etapill" id="etaPill"><span class="dot"></span><span class="stage" id="etaStage">Reading your fileβ¦</span><span class="eta" id="etaTime"></span></div> | |
| <div class="wrap"> | |
| <header> | |
| <div class="logo"><span class="mark">β¦</span> ScrubData</div> | |
| <h1 id="headline">Fix the messy text in your spreadsheet.</h1> | |
| <p class="sub" id="subline">Misspelled names, phone numbers and emails in a dozen formats, cities | |
| typed five different ways β I'll correct and standardize them, protect anything sensitive, and | |
| show you exactly what I changed. <b>I never delete your data</b>; every change is reversible, and | |
| the judgment calls are left to you.</p> | |
| </header> | |
| <div class="err" id="errBox"><span id="errMsg"></span></div> | |
| <div id="uploader"> | |
| <div class="demo"> | |
| <div class="demolab">What "cleaning" looks like β same rows, just fixed</div> | |
| <div class="demogrid"> | |
| <span class="b"><s>nigeia</s></span><span class="ar">β</span><span class="a">Nigeria</span> | |
| <span class="b"><s>Calfornia</s></span><span class="ar">β</span><span class="a">California</span> | |
| <span class="b"><s>Ana@GMAIL.com </s></span><span class="ar">β</span><span class="a">ana@gmail.com</span> | |
| <span class="b"><s>415.555.0192</s></span><span class="ar">β</span><span class="a">(415) 555-0192</span> | |
| </div> | |
| <div class="demofoot">It <b>corrects and standardizes</b> messy text and removes duplicate rows β | |
| it doesn't delete your data. New to this? <span class="samplelink" id="demoSampleBtn" tabindex="0" role="button" aria-label="Watch it run on a sample file">watch it run on a sample file β</span></div> | |
| </div> | |
| <div class="drop" id="dropZone" tabindex="0" role="button" aria-label="Choose a CSV or Excel file to clean" aria-describedby="dropSub"> | |
| <h3 id="dropTitle">Drop your export here</h3> | |
| <p id="dropSub">CSV or Excel. I'll scan it for the usual mess β duplicates, blanks, | |
| mismatched formats, stray spellings β and anything that looks like personal data.</p> | |
| <input type="file" id="fileInput" accept=".csv,.xlsx,.xls,.tsv" hidden> | |
| </div> | |
| <div class="actions-row"> | |
| <button class="btn primary" id="runBtn" disabled aria-label="Clean the selected file"><span id="runLabel">Clean it up</span></button> | |
| <span class="samplelink" id="sampleBtn" tabindex="0" role="button" aria-label="Try it on a messy sales export">try it on a messy sales export</span> | |
| <span class="samplelink" id="sampleBtn2" tabindex="0" role="button" aria-label="Try it on an HR file with sensitive data">or an HR file with sensitive data</span> | |
| </div> | |
| <div class="actions-row" style="margin-top:-18px;gap:18px"> | |
| <label class="modeltoggle" id="modelToggleWrap" style="display:none" aria-label="Clean with the 4B model"> | |
| <input type="checkbox" id="modelToggle" checked aria-label="Clean with the 4B model"> <span>β‘ Clean with the 4B model <span class="hint" id="modelHint">(real fine-tune Β· ~1 min Β· uncheck for instant)</span></span> | |
| </label> | |
| <span class="samplelink" id="recipeLink" tabindex="0" role="button" aria-label="Apply a saved recipe to this file">β» have a saved recipe? apply it to this file</span> | |
| <input type="file" id="recipeInput" accept=".json" hidden> | |
| </div> | |
| <div class="working" id="working"><span class="spin"></span>Cleaning up β this runs locally, so it's quick.</div> | |
| </div> | |
| <div id="results"> | |
| <div class="filebar"> | |
| <div class="fileicon">β¦</div> | |
| <div> | |
| <div class="nm" id="fileName">file.csv</div> | |
| <div class="meta" id="fileMeta"></div> | |
| </div> | |
| <div class="spacer"></div> | |
| <div class="pill-mini" id="fixPill"></div> | |
| </div> | |
| <section> | |
| <div class="eyebrow">The summary, in plain English</div> | |
| <ul class="summary" id="summaryList"></ul> | |
| </section> | |
| <section id="callSection" style="display:none"> | |
| <div class="eyebrow">The judgment calls β I stopped and asked instead of guessing</div> | |
| <div class="callhero" id="callHero"></div> | |
| <div id="callCards"></div> | |
| </section> | |
| <section id="piiSection" style="display:none"> | |
| <div class="eyebrow">Personal data, protected locally</div> | |
| <div id="piiCards"></div> | |
| </section> | |
| <section> | |
| <div class="eyebrow">Handled β already applied (and reversible)</div> | |
| <div id="doneCards"></div> | |
| </section> | |
| <section> | |
| <div class="eyebrow">Before β after preview</div> | |
| <div class="tablebox" id="tableWrap"></div> | |
| <div class="capnote" id="capNote"></div> | |
| </section> | |
| <section> | |
| <div class="eyebrow">The audit trail (no silent edits, ever)</div> | |
| <div class="audit" id="auditGrid"></div> | |
| </section> | |
| <section> | |
| <div class="download"> | |
| <h3>Your clean copy is ready</h3> | |
| <p>Take the cleaned file and the change log. Both are yours to keep.</p> | |
| <div class="dl-row"> | |
| <button class="btn primary" id="downloadBtn">β Download clean file</button> | |
| <button class="btn" id="logBtn">Export change log</button> | |
| <button class="btn" id="recipeBtn">πΎ Save cleaning recipe</button> | |
| </div> | |
| <div class="revert">Your original is untouched β every change above is a named, | |
| reversible operation. <b>Save the recipe</b> and re-apply this exact cleaning to | |
| next month's export in one click.</div> | |
| </div> | |
| <div style="text-align:center"><span class="restart" id="restartBtn">β Clean another file</span></div> | |
| </section> | |
| </div> | |
| </div> | |
| <section class="quests" aria-label="Hackathon goals covered"> | |
| <h4>How this demo covers the challenge</h4> | |
| <div class="qgrid"> | |
| <div class="quest"><span class="pill">ποΈ Tiny Titan</span> | |
| <p>One 4-billion-param model plans every clean β nothing bigger, anywhere.</p></div> | |
| <div class="quest"><span class="pill">π Off the Grid</span> | |
| <p>No third-party AI APIs β a local-runnable GGUF does the work (on-device when self-hosted).</p></div> | |
| <div class="quest"><span class="pill">ποΈ Well-Tuned</span> | |
| <p>Custom QLoRA fine-tune, trained on execution-verified data and published on the Hub.</p></div> | |
| <div class="quest"><span class="pill">π¨ Off-Brand</span> | |
| <p>Hand-built <code>gr.Server</code> interface β zero default Gradio chrome.</p></div> | |
| <div class="quest"><span class="pill">π¦ Llama Champion</span> | |
| <p>Served through llama.cpp as a Q8_0 GGUF.</p></div> | |
| <div class="quest"><span class="pill">π€ Sharing is Caring</span> | |
| <p>Every run's agent traces are published as an open dataset on the Hub.</p></div> | |
| <div class="quest"><span class="pill">π Field Notes</span> | |
| <p>A full build report β the failures documented next to the wins.</p></div> | |
| <div class="quest"><span class="pill">π‘ Backyard AI track</span> | |
| <p>A hands-off cleaner for the non-coder with a messy Monday export.</p></div> | |
| </div> | |
| </section> | |
| <nav class="reslinks" aria-label="Research and resources"> | |
| <h4>Research & resources</h4> | |
| <a href="https://www.loom.com/share/2fa868147527496e8097d82dd546d663" target="_blank" rel="noopener">π¬ Demo video</a> | |
| <a href="https://github.com/ricalanis/scrubdata-hackathon" target="_blank" rel="noopener">π» Code on GitHub</a> | |
| <a href="https://huggingface.co/ricalanis/scrubdata-qwen3-4b" target="_blank" rel="noopener">π§ Fine-tuned model</a> | |
| <a href="https://huggingface.co/datasets/ricalanis/wildclean" target="_blank" rel="noopener">π WildClean dataset</a> | |
| <a href="https://huggingface.co/datasets/build-small-hackathon/scrubdata-traces" target="_blank" rel="noopener">π Agent traces</a> | |
| <a href="https://huggingface.co/spaces/build-small-hackathon/scrubdata/blob/main/docs/FIELD_NOTES.md" target="_blank" rel="noopener">π Field notes</a> | |
| <a href="https://huggingface.co/spaces/build-small-hackathon/scrubdata/blob/main/docs/paper/main.pdf" target="_blank" rel="noopener">π Preprint</a> | |
| </nav> | |
| <footer id="footerNote">Runs locally on a small model. Your data never leaves this machine.</footer> | |
| <div class="ycbar" id="ycApplyBar"> | |
| <span class="txt">You've made <b id="ycApplyCount">0</b> <span id="ycApplyNoun">decisions</span> | |
| β apply them and re-clean.</span> | |
| <button class="btn primary" id="ycApplyBtn">β Clean now</button> | |
| <button class="btn" id="ycClearBtn">Clear</button> | |
| </div> | |
| <script type="module"> | |
| import { Client, handle_file } from "https://cdn.jsdelivr.net/npm/@gradio/client"; | |
| const $ = (id) => document.getElementById(id); | |
| let selectedFile = null, lastResult = null, loadedRecipe = null, serverReady = false; | |
| // gate the run button on server warmth so a first click can't land on a cold | |
| // reference-index build (the "instant" promise). refreshRun() is the single place | |
| // that decides the button's enabled state + label. | |
| function refreshRun(){ | |
| const btn = $("runBtn"); if (!btn) return; | |
| btn.disabled = !(selectedFile && serverReady); | |
| $("runLabel").textContent = !serverReady ? "Warming upβ¦" | |
| : (loadedRecipe ? "Apply saved recipe" : "Clean it up"); | |
| } | |
| (async function pollReady(){ | |
| try { | |
| const client = await Client.connect(window.location.origin); | |
| for (let i = 0; i < 30; i++){ | |
| try { const r = await client.predict("/ready"); if (r && r.data && r.data[0] && r.data[0].ready){ serverReady = true; refreshRun(); return; } } catch {} | |
| await new Promise(res => setTimeout(res, 1500)); | |
| } | |
| } catch {} | |
| serverReady = true; refreshRun(); // give up gating after ~45s; the timer is the backstop | |
| })(); | |
| const SAMPLE_PATH = "/samples/maria_crm_export.csv"; | |
| const RENDER_CAP = 120; | |
| // ---- honest, deployment-aware copy. On a hosted HF Space the file is processed | |
| // on HF's servers; only a self-hosted run is truly on-device. (server injects this) | |
| const RT = window.__SCRUBDATA_RUNTIME__ || { hosted:false, private:true, | |
| planner:"deterministic planner", model_available:false, where:"this machine" }; | |
| (function labelRuntime(){ | |
| // the 4B model is the DEFAULT now (it's the whole point of the hackathon); show the | |
| // toggle only if a model is wired β checked by default, uncheck for the instant path. | |
| if (RT.model_available) $("modelToggleWrap").style.display = ""; | |
| if (RT.model_available && $("modelHint")) | |
| $("modelHint").textContent = RT.hosted | |
| ? "(real fine-tune on an A100 GPU Β· ~1 min Β· uncheck for instant)" | |
| : "(real fine-tune, on-device Β· ~1 min Β· uncheck for instant)"; | |
| if (RT.hosted){ | |
| $("ribbon").innerHTML = "π <b>Hosted demo on Hugging Face.</b> " + | |
| (RT.model_available | |
| ? "Cleans with the real <b>Qwen3-4B fine-tune</b> on an A100 GPU (~1 min; first run after idle ~2 min) β uncheck the box for an instant deterministic pass." | |
| : "Deterministic cleaning.") + | |
| " Your file is processed in the cloud (no third-party API, not stored); <b>clone & run locally</b> to keep it on your machine."; | |
| $("footerNote").textContent = "Hosted demo on Hugging Face. Cleans with the 4B fine-tune on a Modal GPU by default; " + | |
| "clone & run locally for on-device cleaning β then your data never leaves your machine."; | |
| } | |
| // local default copy is already in the HTML (privacy-true) | |
| })(); | |
| // pre-warm the scale-to-zero Modal GPU the moment the page loads, so it's hot by the | |
| // time the user uploads + clicks β hides the ~60s cold start now that model is default. | |
| if (RT.model_available){ | |
| (async ()=>{ try { const c = await Client.connect(window.location.origin); await c.predict("/wake"); } catch(e){} })(); | |
| } | |
| const useModelNow = () => !!(RT.model_available && $("modelToggle") && $("modelToggle").checked); | |
| const OP_LABEL = { | |
| canonicalize_categories: (o)=>`Unified ${o.mapping_size||"several"} inconsistent spelling${(o.mapping_size||2)>1?"s":""}`, | |
| strip_whitespace: ()=>"Trimmed stray spaces", | |
| collapse_internal_whitespace: ()=>"Collapsed doubled spaces", | |
| normalize_disguised_nulls: ()=>'Turned βN/Aβ, β-β, βnullβ into true blanks', | |
| parse_date: ()=>"Standardized dates to YYYY-MM-DD", | |
| parse_currency: ()=>"Parsed currency text into numbers", | |
| parse_number: ()=>"Parsed numeric text into numbers", | |
| parse_percent: ()=>"Parsed percentages into fractions", | |
| standardize_boolean: ()=>"Unified Yes/No values", | |
| standardize_phone: ()=>"Unified phone formats", | |
| normalize_email: ()=>"Tidied email addresses", | |
| standardize_case: ()=>"Fixed inconsistent casing", | |
| flag_pii: (o)=>`Flagged ${String(o.pii_type||"personal data").replace(/_/g," ")}`, | |
| mask_pii: (o)=>`Masked ${String(o.pii_type||"sensitive values").replace(/_/g," ")} (kept just enough to recognize)`, | |
| hash_pii: (o)=>`Hashed ${String(o.pii_type||"sensitive values").replace(/_/g," ")}`, | |
| pseudonymize_pii: (o)=>`Pseudonymized ${String(o.pii_type||"values").replace(/_/g," ")} (joins still work)`, | |
| }; | |
| const TABLE_OP_LABEL = { | |
| drop_empty_rows: "Removed fully-empty rows", | |
| drop_empty_columns: "Dropped columns with no data", | |
| drop_exact_duplicates: "Removed exact duplicate rows", | |
| }; | |
| function showError(m){ $("errMsg").textContent = m; $("errBox").classList.add("show"); } | |
| function clearError(){ $("errBox").classList.remove("show"); } | |
| function pickFile(f){ | |
| selectedFile = f; | |
| $("dropTitle").innerHTML = '<span class="picked"></span>'; | |
| $("dropTitle").firstChild.textContent = f.name; | |
| $("dropSub").textContent = (f.size/1024).toFixed(1) + " KB Β· ready"; | |
| refreshRun(); clearError(); | |
| } | |
| async function pickSample(path, name){ | |
| clearError(); | |
| try { | |
| const r = await fetch(path, { cache: "no-store" }); | |
| if (!r.ok) throw new Error("Could not load sample (HTTP " + r.status + ")"); | |
| const f = new File([await r.blob()], name, { type: "text/csv" }); | |
| pickFile(f); | |
| $("dropSub").textContent = (f.size/1024).toFixed(1) + " KB Β· sample Β· ready"; | |
| } catch (e){ showError(e.message || String(e)); } | |
| } | |
| $("fileInput").addEventListener("change", e => e.target.files[0] && pickFile(e.target.files[0])); | |
| $("dropZone").addEventListener("click", () => $("fileInput").click()); | |
| $("dropZone").addEventListener("keydown", e=>{ | |
| if (e.key === "Enter" || e.key === " "){ | |
| e.preventDefault(); | |
| $("fileInput").click(); | |
| } | |
| }); | |
| $("sampleBtn").addEventListener("click", ()=>pickSample(SAMPLE_PATH, "maria_crm_export.csv")); | |
| $("sampleBtn2").addEventListener("click", ()=>pickSample("/samples/hr_payroll.csv", "hr_payroll.csv")); | |
| document.querySelectorAll(".samplelink[tabindex]").forEach(el=>{ | |
| el.addEventListener("keydown", e=>{ | |
| if (e.key === "Enter" || e.key === " "){ | |
| e.preventDefault(); | |
| el.click(); | |
| } | |
| }); | |
| }); | |
| // hero "watch it run" nudge: load the sample, then point the user at the now-active button | |
| $("demoSampleBtn").addEventListener("click", async ()=>{ | |
| await pickSample(SAMPLE_PATH, "maria_crm_export.csv"); | |
| const rb = $("runBtn"); rb.scrollIntoView({behavior:"smooth", block:"center"}); | |
| rb.classList.add("nudge"); setTimeout(()=>rb.classList.remove("nudge"), 1600); | |
| }); | |
| ["dragenter","dragover"].forEach(ev => $("dropZone").addEventListener(ev, e=>{e.preventDefault();$("dropZone").classList.add("drag");})); | |
| ["dragleave","drop"].forEach(ev => $("dropZone").addEventListener(ev, e=>{e.preventDefault();$("dropZone").classList.remove("drag");})); | |
| $("dropZone").addEventListener("drop", e=>{ e.dataTransfer.files[0] && pickFile(e.dataTransfer.files[0]); }); | |
| $("restartBtn").addEventListener("click", ()=>{ $("results").classList.remove("show"); | |
| $("uploader").style.display=""; $("headline").textContent="Fix the messy text in your spreadsheet."; | |
| window.scrollTo({top:0, behavior:"smooth"}); }); | |
| // ---- save / re-apply cleaning recipe (the "Monday ritual") | |
| $("recipeLink").addEventListener("click", ()=>$("recipeInput").click()); | |
| $("recipeInput").addEventListener("change", async e=>{ | |
| const f = e.target.files[0]; if (!f) return; | |
| try { | |
| loadedRecipe = await f.text(); JSON.parse(loadedRecipe); // validate it's JSON | |
| $("recipeLink").textContent = "β» recipe loaded: " + f.name + " β Clean it up to apply"; | |
| refreshRun(); | |
| } catch { showError("That doesn't look like a saved recipe (.json)."); loadedRecipe = null; } | |
| }); | |
| $("runBtn").addEventListener("click", run); | |
| // ---- the ETA timer: a size-aware, well-mannered progress bar + status pill. It | |
| // eases toward our time estimate, rotates playful stage labels, holds at ~92% if | |
| // the run overruns (never completes before the real result), and snaps to 100% on | |
| // finish. The estimate adapts to file size AND whether the model path is active. | |
| let _etaRAF = 0, _etaStart = 0, _etaEst = 1, _etaMode = "det", _etaDone = false; | |
| function _estimateMs(bytes, useModel){ | |
| const kb = (bytes || 30000) / 1024; | |
| // model path measured on the live A100 (format=json): warm ~45-80s (node variance), | |
| // cold ~122s end-to-end. Estimate ~60s and let the bar hold gracefully past it on a | |
| // cold start (cap below the cold number so it eases, never finishes early). | |
| return useModel ? Math.min(135000, 55000 + kb * 45) | |
| : Math.max(500, 450 + kb * 12); // deterministic path (~0.6s) | |
| } | |
| function _stage(p){ | |
| if (_etaMode === "model"){ | |
| if (p < 0.12) return "Reading your fileβ¦"; | |
| if (p < 0.30) return "Profiling the columnsβ¦"; | |
| if (p < 0.52) return "Warming up the modelβ¦"; | |
| if (p < 0.80) return "Asking the model about the tricky onesβ¦"; | |
| return "Writing the cleaning planβ¦"; | |
| } | |
| if (p < 0.15) return "Reading your fileβ¦"; | |
| if (p < 0.45) return "Profiling the columnsβ¦"; | |
| if (p < 0.75) return "Spotting the messy bitsβ¦"; | |
| return "Writing the cleaning planβ¦"; | |
| } | |
| function _overrunLabel(el){ | |
| // honest copy once we pass the estimate (never a static stall) | |
| if (_etaMode === "model") | |
| return (Math.floor(el / 12000) % 2) ? "The model's weighing the tricky valuesβ¦" : "Almost thereβ¦"; | |
| return "Warming up the serverβ¦ (first run)"; // det overrun β a cold first clean | |
| } | |
| function startTimer(bytes, useModel){ | |
| _etaMode = useModel ? "model" : "det"; | |
| _etaEst = _estimateMs(bytes, useModel); _etaStart = performance.now(); _etaDone = false; | |
| const rb = $("ribbon"); // keep the pill off the ribbon copy | |
| if (rb) $("etaPill").style.top = (rb.offsetHeight + 8) + "px"; | |
| $("etaBar").classList.add("show"); $("etaPill").classList.add("show"); | |
| cancelAnimationFrame(_etaRAF); | |
| const tick = ()=>{ | |
| const el = performance.now() - _etaStart; | |
| let p = 1 - Math.pow(2, -el / (_etaEst * 0.42)); // ease-out toward 1 | |
| if (!_etaDone && p > 0.92) p = 0.92; // hold until the real result | |
| $("etaFill").style.width = (p * 100).toFixed(1) + "%"; | |
| const over = el > _etaEst; | |
| $("etaStage").textContent = over ? _overrunLabel(el) : _stage(p); | |
| const left = Math.ceil((_etaEst - el) / 1000); | |
| $("etaTime").textContent = (!over && left > 0) ? "~" + left + "s" : ""; | |
| _etaRAF = requestAnimationFrame(tick); | |
| }; | |
| tick(); | |
| } | |
| function stopTimer(){ | |
| _etaDone = true; cancelAnimationFrame(_etaRAF); | |
| $("etaFill").style.width = "100%"; | |
| setTimeout(()=>{ $("etaBar").classList.remove("show"); $("etaPill").classList.remove("show"); | |
| $("etaFill").style.width = "0"; }, 320); | |
| } | |
| async function run(){ | |
| if (!selectedFile) return; | |
| clearError(); | |
| const um = useModelNow() && !loadedRecipe; // model only on a fresh clean, not recipe replay | |
| $("runBtn").disabled = true; startTimer(selectedFile.size, um); | |
| try { | |
| const client = await Client.connect(window.location.origin); | |
| const res = loadedRecipe | |
| ? await client.predict("/clean_with_plan", { file_path: handle_file(selectedFile), plan_json: loadedRecipe }) | |
| : await client.predict("/clean_data", { file_path: handle_file(selectedFile), use_model: um }); | |
| const data = res.data[0]; | |
| if (!data || typeof data !== "object") throw new Error("Unexpected response from the cleaner."); | |
| lastResult = data; | |
| render(data); | |
| } catch (e){ console.error(e); showError(e.message || String(e)); } | |
| finally { refreshRun(); stopTimer(); } | |
| } | |
| // ---- interactive YOUR CALL: STAGE decisions, then apply them all in one re-clean. | |
| // pendingPicks key = column + NUL + raw -> {col, raw, canon} (canon null = keep as-is) | |
| let pendingPicks = {}; | |
| const _pk = (col, raw) => col + "\u0000" + raw; | |
| $("callCards").addEventListener("click", e=>{ | |
| const btn = e.target.closest("[data-yc]"); | |
| if (!btn) return; | |
| const item = (window._ycItems || [])[+btn.dataset.yc]; | |
| if (!item) return; | |
| const key = _pk(item.col, item.raw); | |
| if (pendingPicks[key] && pendingPicks[key].canon === item.canon) delete pendingPicks[key]; // toggle off | |
| else pendingPicks[key] = item; // select / switch | |
| paintYcSelections(); | |
| }); | |
| function paintYcSelections(){ | |
| document.querySelectorAll('#callCards [data-yc]').forEach(b=>{ | |
| const it = (window._ycItems || [])[+b.dataset.yc]; if (!it) return; | |
| const p = pendingPicks[_pk(it.col, it.raw)]; | |
| b.classList.toggle('sel', !!p && p.canon === it.canon); | |
| }); | |
| const n = Object.keys(pendingPicks).length; | |
| $("ycApplyBar").classList.toggle("show", n > 0); | |
| document.body.classList.toggle("has-ycbar", n > 0); | |
| $("ycApplyCount").textContent = n; | |
| $("ycApplyNoun").textContent = n === 1 ? "decision" : "decisions"; | |
| } | |
| $("ycClearBtn").addEventListener("click", ()=>{ pendingPicks = {}; paintYcSelections(); }); | |
| $("ycApplyBtn").addEventListener("click", applyYourCalls); | |
| async function applyYourCalls(){ | |
| const picks = Object.values(pendingPicks); | |
| if (!picks.length || !lastResult || !lastResult.plan_raw || !selectedFile) return; | |
| const plan = JSON.parse(JSON.stringify(lastResult.plan_raw)); | |
| picks.forEach(({col, raw, canon})=>{ | |
| (plan.flags || []).forEach(fl=>{ | |
| if (fl.column === col && Array.isArray(fl.values)){ | |
| fl.values = fl.values.filter(x => x !== raw); | |
| if (fl.candidates) delete fl.candidates[raw]; | |
| } | |
| }); | |
| if (canon){ | |
| plan.columns = plan.columns || []; | |
| let c = plan.columns.find(x => x.name === col); | |
| if (!c){ c = {name: col, detected_semantic_type: "categorical", operations: []}; plan.columns.push(c); } | |
| c.operations = c.operations || []; | |
| let op = c.operations.find(o => o.op === "canonicalize_categories"); | |
| if (!op){ op = {op: "canonicalize_categories", mapping: {}, rationale: "Your decisions (resolved from review)."}; c.operations.push(op); } | |
| op.mapping = op.mapping || {}; op.mapping[raw] = canon; | |
| } | |
| }); | |
| plan.flags = (plan.flags || []).filter(fl => | |
| !(Array.isArray(fl.values) && fl.values.length === 0 && | |
| (fl.issue === "uncertain_canonicalization" || fl.issue === "suspect_values"))); | |
| const apply = $("ycApplyBtn"); apply.disabled = true; apply.textContent = "cleaningβ¦"; clearError(); | |
| startTimer(selectedFile.size, false); // replay applies a saved plan β no model call | |
| try { | |
| const client = await Client.connect(window.location.origin); | |
| const res = await client.predict("/clean_with_plan", | |
| { file_path: handle_file(selectedFile), plan_json: JSON.stringify(plan) }); | |
| const data = res.data[0]; | |
| if (!data || typeof data !== "object") throw new Error("Couldn't apply your decisions."); | |
| lastResult = data; render(data); // render() resets pendingPicks + repaints | |
| const sec = $("callSection"); window.scrollTo({top: sec && sec.style.display!=="none" ? Math.max(0, sec.offsetTop-16) : 0, behavior:"smooth"}); | |
| } catch (e){ console.error(e); showError(e.message || String(e)); } | |
| finally { apply.disabled = false; apply.textContent = "β Clean now"; stopTimer(); } | |
| } | |
| function esc(s){ const d=document.createElement("div"); d.textContent = s==null?"":String(s); return d.innerHTML; } | |
| function columnExamples(data, col, max=2){ | |
| // index-aligned examples when row count is unchanged (mask/format ops never drop rows) | |
| if (data.total_rows_before !== data.total_rows_after) return []; | |
| const out = []; | |
| for (let i=0; i<Math.min(data.before.length, data.after.length); i++){ | |
| const b = data.before[i][col], a = data.after[i][col]; | |
| if (String(b) !== String(a)) { out.push([b, a]); if (out.length >= max) break; } | |
| } | |
| return out; | |
| } | |
| function render(data){ | |
| $("uploader").style.display = "none"; | |
| $("headline").textContent = "Done. Here's what changed."; | |
| $("results").classList.add("show"); | |
| $("fileName").textContent = selectedFile ? selectedFile.name : "your file"; | |
| const secs = data.elapsed_ms != null ? (data.elapsed_ms/1000) : null; | |
| const timeStr = secs == null ? "" : (secs < 1 ? ` Β· in ${Math.round(data.elapsed_ms)}ms` : ` Β· in ${secs.toFixed(1)}s`); | |
| const whereStr = RT.hosted ? "cleaned on HF's servers" : "cleaned on-device"; | |
| const gen = (data.plan_raw && data.plan_raw._generated_by) || ""; | |
| const plannerLabel = /model:/i.test(gen) ? "Qwen3-4B fine-tune" : "deterministic planner"; | |
| $("fileMeta").textContent = `${data.total_rows_before.toLocaleString()} rows Γ ${data.columns_before.length} columns Β· ${whereStr} Β· ${plannerLabel}${timeStr}`; | |
| const nChanges = (data.change_log||[]).length; | |
| $("fixPill").textContent = `${nChanges} change${nChanges===1?"":"s"} applied`; | |
| // ---- plain-English summary | |
| const sl = $("summaryList"); sl.innerHTML = ""; | |
| const items = []; | |
| const cols = data.plan_columns || []; | |
| const flags = data.flags || []; | |
| const canonCols = cols.filter(c => c.operations.some(o=>o.op==="canonicalize_categories")); | |
| const totalMapped = canonCols.reduce((n,c)=>n + c.operations.filter(o=>o.op==="canonicalize_categories") | |
| .reduce((m,o)=>m+(o.mapping_size||0),0), 0); | |
| if (canonCols.length) items.push(["ποΈ", `<b>Unified ${totalMapped} inconsistent spelling${totalMapped===1?"":"s"}</b> across ${canonCols.length} column${canonCols.length===1?"":"s"} (${esc(canonCols.map(c=>c.name).join(", "))}).`]); | |
| const fmtOps = ["parse_date","parse_currency","parse_percent","standardize_phone","standardize_boolean","normalize_email","standardize_case"]; | |
| const fmtCols = cols.filter(c => c.operations.some(o=>fmtOps.includes(o.op))); | |
| if (fmtCols.length) items.push(["π ", `<b>Standardized formats</b> (dates, numbers, phones, emails) in ${fmtCols.length} column${fmtCols.length===1?"":"s"}.`]); | |
| const nullCols = cols.filter(c => c.operations.some(o=>o.op==="normalize_disguised_nulls")); | |
| if (nullCols.length) items.push(["β¬", `<b>Treated disguised blanks</b> (βN/Aβ, βββ, βnullβ) as truly empty in ${nullCols.length} column${nullCols.length===1?"":"s"}, so counts and filters behave.`]); | |
| const maskCols = cols.filter(c => c.operations.some(o=>["mask_pii","hash_pii","pseudonymize_pii"].includes(o.op))); | |
| const flagPii = cols.filter(c => c.operations.some(o=>o.op==="flag_pii") && !maskCols.includes(c)); | |
| if (maskCols.length) items.push(["π‘οΈ", `<b>Protected ${maskCols.length} sensitive column${maskCols.length===1?"":"s"}</b> (${esc(maskCols.map(c=>c.name).join(", "))}) β masked locally, nothing left this machine.`]); | |
| if (flagPii.length) items.push(["π", `<b>Spotted personal data</b> in ${esc(flagPii.map(c=>c.name).join(", "))} β flagged, not changed.`]); | |
| const tableOpsSeen = new Set(); | |
| (data.change_log||[]).forEach(e=>{ if (e && e.op && TABLE_OP_LABEL[e.op]) tableOpsSeen.add(e.op); }); | |
| tableOpsSeen.forEach(op => items.push(["π§Ή", `<b>${TABLE_OP_LABEL[op]}</b>.`])); | |
| if (flags.length) items.push(["β", `<span style="color:var(--call)"><b>${flags.length} thing${flags.length===1?"":"s"} need${flags.length===1?"s":""} your judgment</b> β left untouched below.</span>`]); | |
| if (!items.length) items.push(["β¨", "This file was already in good shape β nothing needed changing."]); | |
| items.forEach(([ic, html])=>{ const li=document.createElement("li"); | |
| li.innerHTML=`<span class="ic">${ic}</span><div>${html}</div>`; sl.appendChild(li); }); | |
| // ---- PII cards | |
| const piiCards = $("piiCards"); piiCards.innerHTML = ""; | |
| const piiCols = cols.filter(c => c.operations.some(o => (o.op||"").includes("pii"))); | |
| const piiAlerts = data.pii_alerts || []; | |
| $("piiSection").style.display = (piiCols.length || piiAlerts.length) ? "" : "none"; | |
| // embedded-PII alerts: cards/SSNs buried in free-text columns (review, not auto-masked) | |
| piiAlerts.forEach(a=>{ | |
| const ptype = String(a.pii_type||"sensitive data").replace(/_/g," "); | |
| const card = document.createElement("div"); | |
| card.className = "card pii"; | |
| card.innerHTML = ` | |
| <div class="card-top"><span class="card-title">${esc(a.column)}</span> | |
| <span class="badge-row"><span class="pii-badge">${esc(ptype)}</span></span> | |
| <span class="pill pii">REVIEW</span></div> | |
| <div class="card-body">Found <b>${a.count}</b> ${esc(ptype)}-shaped value${a.count===1?"":"s"} | |
| buried inside this free-text column (e.g. <code>${esc(a.example||"")}</code>) β they slipped | |
| past column-level detection. Left as-is so your notes aren't mangled; <b>review before sharing</b>.</div>`; | |
| piiCards.appendChild(card); | |
| }); | |
| piiCols.forEach(c=>{ | |
| const ops = c.operations.filter(o=>(o.op||"").includes("pii")); | |
| const masked = ops.find(o=>["mask_pii","hash_pii","pseudonymize_pii"].includes(o.op)); | |
| const ptype = (ops[0].pii_type||"personal data").replace(/_/g," "); | |
| const ex = masked ? columnExamples(data, c.name) : []; | |
| const card = document.createElement("div"); | |
| card.className = "card pii"; | |
| card.innerHTML = ` | |
| <div class="card-top"><span class="card-title">${esc(c.name)}</span> | |
| <span class="badge-row"><span class="pii-badge">${esc(ptype)}</span></span> | |
| <span class="pill pii">${masked ? "PROTECTED" : "FLAGGED"}</span></div> | |
| <div class="card-body">${masked | |
| ? esc(masked.rationale || "Masked locally; the original file is untouched.") | |
| : "Contains " + esc(ptype) + " β flagged for your awareness, values left exactly as they were."}</div> | |
| ${ex.length ? `<div class="ba"> | |
| <div class="col"><div class="lab">Before</div>${ex.map(([b])=>`<div class="val was">${esc(b)}</div>`).join("")}</div> | |
| <div class="arrow">β</div> | |
| <div class="col"><div class="lab">After</div>${ex.map(([,a])=>`<div class="val">${esc(a)}</div>`).join("")}</div> | |
| </div>` : ""}`; | |
| piiCards.appendChild(card); | |
| }); | |
| // ---- DONE cards (non-PII applied ops) | |
| const doneCards = $("doneCards"); doneCards.innerHTML = ""; | |
| cols.forEach(c=>{ | |
| const ops = c.operations.filter(o=>!(o.op||"").includes("pii")); | |
| if (!ops.length) return; | |
| const canon = ops.find(o=>o.op==="canonicalize_categories"); | |
| const sample = canon && canon.mapping_sample ? Object.entries(canon.mapping_sample).slice(0,3) : []; | |
| const card = document.createElement("div"); | |
| card.className = "card done"; | |
| card.innerHTML = ` | |
| <div class="card-top"><span class="card-title">${esc(c.name)}</span><span class="pill done">DONE</span></div> | |
| <div class="card-body">${ops.map(o=>esc((OP_LABEL[o.op]||(()=>o.op))(o))).join(" Β· ")}</div> | |
| ${sample.length ? `<div class="ba"> | |
| <div class="col"><div class="lab">Before</div>${sample.map(([b])=>`<div class="val was">${esc(b)}</div>`).join("")}</div> | |
| <div class="arrow">β</div> | |
| <div class="col"><div class="lab">After</div>${sample.map(([,a])=>`<div class="val">${esc(a)}</div>`).join("")}</div> | |
| </div>` : ""}`; | |
| doneCards.appendChild(card); | |
| }); | |
| if (!doneCards.children.length) | |
| doneCards.innerHTML = `<div class="card done"><div class="card-body">${ | |
| tableOpsSeen.size | |
| ? "No column fixes were needed beyond the table-level tidy-up." | |
| : "No automatic fixes were needed here." | |
| }</div></div>`; | |
| // ---- YOUR CALL (review flags) β the abstention hero | |
| const callCards = $("callCards"); callCards.innerHTML = ""; | |
| const callHero = $("callHero"); callHero.innerHTML = ""; | |
| $("callSection").style.display = flags.length ? "" : "none"; | |
| // merge flags by column, and per value keep the RICHEST candidate list β the | |
| // backend can emit two flags for one column (one carrying the candidate ties, | |
| // one bare); without this merge the bare "no close match" card buries the real | |
| // tie (e.g. Slovia β Slovakia 86% vs Slovenia 86%). | |
| const byCol = new Map(); | |
| flags.forEach(f=>{ | |
| const col = f.column || ""; | |
| if (!byCol.has(col)) byCol.set(col, {col, rationale:"", vals:new Map()}); | |
| const e = byCol.get(col); | |
| const cands = f.candidates || {}; | |
| const hasCands = Object.keys(cands).length > 0; | |
| if ((f.rationale||f.issue) && (!e.rationale || hasCands)) e.rationale = f.rationale || f.issue; | |
| (f.values||[]).forEach(v=>{ | |
| const cs = cands[v] || []; | |
| const prev = e.vals.get(v); | |
| if (prev === undefined || cs.length > prev.length) e.vals.set(v, cs); | |
| }); | |
| }); | |
| const merged = [...byCol.values()]; | |
| if (merged.length){ | |
| let tieCount = 0, total = 0; | |
| merged.forEach(m => m.vals.forEach(cs => { total++; if ((cs||[]).length>=2) tieCount++; })); | |
| callHero.innerHTML = `<b>I left ${total} value${total===1?"":"s"} alone on purpose.</b> ` + | |
| (tieCount ? `${tieCount} ${tieCount===1?"is a genuine toss-up":"are genuine toss-ups"} β close enough that guessing would be a coin flip. ` : "") + | |
| `A wrong "fix" is worse than no fix β so <b>you decide: tap a match to apply it</b>, or keep the value as-is.`; | |
| } | |
| pendingPicks = {}; // fresh analysis -> no staged decisions carried over | |
| const ycItems = []; // resolution targets, indexed by data-yc (injection-safe) | |
| merged.forEach(m=>{ | |
| const card = document.createElement("div"); | |
| card.className = "card call"; | |
| const col = m.col; | |
| const vals = [...m.vals.entries()].slice(0,12); | |
| // render each uncertain value with clickable candidate buttons + keep-as-is | |
| const rowsHtml = vals.map(([v, csAll])=>{ | |
| const cs = (csAll||[]).slice(0,2); | |
| let opts = cs.map((c,i)=>{ | |
| const idx = ycItems.push({col, raw:v, canon:c.canon}) - 1; | |
| return `${i?'<span class="vs">or</span>':''}<button class="opt" data-yc="${idx}" aria-label="Use ${esc(c.canon)} for ${esc(v)}">${esc(c.canon)}<span class="sc">${(c.score*100).toFixed(0)}%</span></button>`; | |
| }).join(""); | |
| if (!cs.length) opts = `<span class="vs">no close match</span>`; | |
| const keepIdx = ycItems.push({col, raw:v, canon:null}) - 1; | |
| opts += `<button class="keep" data-yc="${keepIdx}" aria-label="Keep ${esc(v)} as is">keep β${esc(v)}β</button>`; | |
| return `<div class="src">${esc(v)}</div><div class="opts">${opts}</div>`; | |
| }).join(""); | |
| card.innerHTML = ` | |
| <div class="card-top"><span class="card-title">${esc(col)}</span><span class="pill call">YOUR CALL</span></div> | |
| <div class="card-body">${esc(m.rationale||"Left for review.")}</div> | |
| ${rowsHtml ? `<div class="tie">${rowsHtml}</div>` : ""}`; | |
| callCards.appendChild(card); | |
| }); | |
| window._ycItems = ycItems; | |
| paintYcSelections(); // hide the action bar on a fresh render | |
| // ---- preview table (after, changed cells highlighted, old value struck through) | |
| const wrap = $("tableWrap"); | |
| const colsAfter = data.columns_after || []; | |
| const n = Math.min(data.after.length, RENDER_CAP); | |
| const aligned = data.total_rows_before === data.total_rows_after; | |
| let html = "<table><thead><tr>" + colsAfter.map(c=>`<th>${esc(c)}</th>`).join("") + "</tr></thead><tbody>"; | |
| for (let i=0; i<n; i++){ | |
| html += "<tr>" + colsAfter.map(c=>{ | |
| const a = data.after[i] ? data.after[i][c] : ""; | |
| const b = aligned && data.before[i] ? data.before[i][c] : a; | |
| if (String(a) !== String(b)) | |
| return `<td class="chg"><span class="old">${esc(b)}</span>${esc(a)}</td>`; | |
| return `<td>${esc(a)}</td>`; | |
| }).join("") + "</tr>"; | |
| } | |
| wrap.innerHTML = html + "</tbody></table>"; | |
| $("capNote").textContent = data.total_rows_after > n | |
| ? `Showing the first ${n} of ${data.total_rows_after.toLocaleString()} rows β the download has everything.` : ""; | |
| // ---- audit grid (monitorability) | |
| const m = data.monitor || {}; | |
| const audit = $("auditGrid"); audit.innerHTML = ""; | |
| [["columns_touched","columns touched"],["canonicalizations","values unified"], | |
| ["grounded_columns","reference-grounded"],["abstentions","abstained β review"], | |
| ["pii_columns_protected","PII columns protected"], | |
| ["changes_applied","changes logged"],["silent_edits","silent edits"]] | |
| .forEach(([k,lbl])=>{ | |
| const v = (m[k] === null || m[k] === undefined) ? "β" : m[k]; | |
| const d = document.createElement("div"); | |
| d.className = "stat"; | |
| d.innerHTML = `<div class="num">${esc(v)}</div><div class="lbl">${lbl}</div>`; | |
| audit.appendChild(d); | |
| }); | |
| } | |
| $("downloadBtn").addEventListener("click", ()=>{ | |
| if (!lastResult || !lastResult.csv_text) return; | |
| const url = URL.createObjectURL(new Blob([lastResult.csv_text], {type:"text/csv;charset=utf-8"})); | |
| const a = document.createElement("a"); a.href=url; a.download="scrubbed.csv"; | |
| document.body.appendChild(a); a.click(); a.remove(); | |
| setTimeout(()=>URL.revokeObjectURL(url), 1000); | |
| }); | |
| $("logBtn").addEventListener("click", ()=>{ | |
| if (!lastResult) return; | |
| const payload = JSON.stringify({change_log:lastResult.change_log, plan_columns:lastResult.plan_columns, | |
| flags:lastResult.flags, monitor:lastResult.monitor}, null, 2); | |
| const url = URL.createObjectURL(new Blob([payload], {type:"application/json"})); | |
| const a = document.createElement("a"); a.href=url; a.download="scrubdata-changelog.json"; | |
| document.body.appendChild(a); a.click(); a.remove(); | |
| setTimeout(()=>URL.revokeObjectURL(url), 1000); | |
| }); | |
| $("recipeBtn").addEventListener("click", ()=>{ | |
| const recipe = lastResult && lastResult.plan_raw; | |
| if (!recipe) return; | |
| const url = URL.createObjectURL(new Blob([JSON.stringify(recipe, null, 2)], {type:"application/json"})); | |
| const a = document.createElement("a"); a.href=url; a.download="scrubdata-recipe.json"; | |
| document.body.appendChild(a); a.click(); a.remove(); | |
| setTimeout(()=>URL.revokeObjectURL(url), 1000); | |
| }); | |
| </script> | |
| </body> | |
| </html> | |