Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="utf-8" /> | |
| <title>DesignGym 2.0 β Live Demo</title> | |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> | |
| <link rel="preconnect" href="https://fonts.googleapis.com" /> | |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin /> | |
| <link href="https://fonts.googleapis.com/css2?family=Fraunces:opsz,wght@9..144,400;9..144,700&family=Inter:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet" /> | |
| <style> | |
| :root{ | |
| --bg:#0f1117;--s1:#181b25;--s2:#1f2330;--s3:#262a38; | |
| --bd:rgba(255,255,255,.08);--bd2:rgba(255,255,255,.14); | |
| --tx:#e8ecf4;--mt:#8892a6;--ac:#5b9cf5;--gn:#3dd68c; | |
| --am:#f0b429;--rd:#ef6461; | |
| --ff:'Inter',system-ui,sans-serif; | |
| --fm:'JetBrains Mono',ui-monospace,monospace; | |
| --fd:'Fraunces',Georgia,serif; | |
| } | |
| *,*::before,*::after{box-sizing:border-box;margin:0} | |
| body{font-family:var(--ff);background:var(--bg);color:var(--tx);line-height:1.5;min-height:100vh} | |
| a{color:var(--ac);text-decoration:none}a:hover{text-decoration:underline} | |
| /* ββ NAV TABS ββ */ | |
| .tab-bar{display:flex;gap:6px;padding:10px 20px;background:var(--s1);border-bottom:1px solid var(--bd);position:sticky;top:0;z-index:90;align-items:center} | |
| .tab-bar-title{font-family:var(--fd);font-size:16px;font-weight:700;margin-right:auto;white-space:nowrap} | |
| .tab-btn{display:inline-flex;align-items:center;gap:6px;padding:8px 18px;font-family:var(--ff);font-size:13px;font-weight:600;color:var(--mt);background:var(--s2);border:1px solid var(--bd2);border-radius:999px;cursor:pointer;transition:all .15s;white-space:nowrap} | |
| .tab-btn:hover{color:var(--tx);background:var(--s3);border-color:var(--ac)} | |
| .tab-btn.active{color:#fff;background:var(--ac);border-color:var(--ac)} | |
| .tab-ico{font-size:15px;line-height:1} | |
| .blog-link{display:inline-flex;align-items:center;gap:6px;padding:8px 18px;font-family:var(--ff);font-size:13px;font-weight:600;color:var(--mt);background:var(--s2);border:1px solid var(--bd2);border-radius:999px;cursor:pointer;transition:all .15s;white-space:nowrap;text-decoration:none} | |
| .blog-link:hover{color:#fff;background:#d4629a;border-color:#d4629a;text-decoration:none} | |
| .tab-page{display:none;padding:0}.tab-page.active{display:block} | |
| /* ββ HEADER ββ */ | |
| .hdr{padding:20px 24px;border-bottom:1px solid var(--bd)} | |
| .hdr h1{font-family:var(--fd);font-size:22px;font-weight:700} | |
| .hdr-sub{color:var(--mt);font-size:12px;margin-top:4px} | |
| /* ββ BACKEND CARD ββ */ | |
| .bc{margin:16px 24px;background:var(--s1);border:1px solid var(--bd2);border-radius:12px;padding:14px 18px;display:flex;align-items:center;gap:14px;flex-wrap:wrap} | |
| .dot{width:10px;height:10px;border-radius:50%;flex-shrink:0} | |
| .dot-g{background:var(--gn);box-shadow:0 0 6px var(--gn)}.dot-a{background:var(--am);box-shadow:0 0 6px var(--am)}.dot-r{background:var(--rd);box-shadow:0 0 6px var(--rd)}.dot-m{background:var(--mt)} | |
| .bc-info{flex:1;min-width:200px} | |
| .bc-lbl{font-family:var(--fm);font-size:13px;font-weight:500} | |
| .bc-det{color:var(--mt);font-size:11px;margin-top:2px} | |
| .sw{display:flex;align-items:center;gap:6px} | |
| .sw label{font-size:10px;color:var(--mt);text-transform:uppercase;letter-spacing:.5px} | |
| .sw select{font-family:var(--fm);font-size:12px;padding:5px 8px;border-radius:6px;border:1px solid var(--bd2);background:var(--s2);color:var(--tx)} | |
| /* ββ PANELS ββ */ | |
| .grid2{display:grid;grid-template-columns:320px 1fr;gap:16px;padding:16px 24px} | |
| @media(max-width:860px){.grid2{grid-template-columns:1fr}} | |
| .pnl{background:var(--s1);border:1px solid var(--bd);border-radius:12px;padding:16px} | |
| .pnl h2{font-size:10px;text-transform:uppercase;letter-spacing:.6px;color:var(--mt);margin:14px 0 6px}.pnl h2:first-child{margin-top:0} | |
| /* controls */ | |
| .sel{width:100%;font-family:var(--fm);font-size:12px;padding:8px 10px;border-radius:8px;border:1px solid var(--bd2);background:var(--s2);color:var(--tx);-webkit-appearance:none;appearance:none} | |
| .pol-pick{display:flex;flex-direction:column;gap:6px} | |
| .pol-c{display:flex;gap:8px;align-items:flex-start;padding:8px 10px;border:1px solid var(--bd2);border-radius:8px;background:var(--s2);cursor:pointer;transition:border-color .12s} | |
| .pol-c:hover{border-color:var(--ac)}.pol-c input{margin-top:3px} | |
| .pol-c div{display:flex;flex-direction:column}.pol-c strong{font-size:12px}.pol-c .desc{color:var(--mt);font-size:10px;line-height:1.4} | |
| .help-tip{display:block;color:var(--mt);font-size:10px;margin-top:3px;font-style:italic} | |
| .acts{display:flex;gap:6px;margin-top:4px} | |
| button{font-family:var(--ff);font-size:12px;padding:9px 12px;border-radius:8px;border:1px solid var(--bd2);background:var(--s2);color:var(--tx);cursor:pointer;transition:border-color .12s,background .12s} | |
| button:hover{border-color:var(--ac)}button:disabled{opacity:.35;cursor:not-allowed} | |
| button.pri{background:var(--ac);border-color:transparent;color:#0f1117;font-weight:600;flex:1;min-width:120px} | |
| button.pri:hover{background:#7ab4f7} | |
| button.sec{background:transparent} | |
| .sts{margin-top:10px;font-size:11px;font-family:var(--fm);padding:7px 9px;border-radius:6px;background:rgba(255,255,255,.03);border:1px dashed var(--bd2);min-height:32px} | |
| .sts.run{border-style:solid;border-color:var(--ac);color:var(--ac)} | |
| .sts.ok{border-style:solid;border-color:var(--gn);color:var(--gn)} | |
| .sts.err{border-style:solid;border-color:var(--rd);color:var(--rd)} | |
| /* while-you-wait banner */ | |
| .wait-banner{display:none;margin:12px 24px;padding:14px 18px;background:linear-gradient(135deg,rgba(91,156,245,.08),rgba(212,98,154,.08));border:1px solid rgba(91,156,245,.25);border-radius:12px;animation:fadeIn .3s ease} | |
| .wait-banner.show{display:block} | |
| .wait-banner .wb-hd{font-size:13px;font-weight:600;margin-bottom:6px;color:var(--tx)} | |
| .wait-banner .wb-timer{font-family:var(--fm);font-size:22px;font-weight:600;color:var(--ac);margin:6px 0} | |
| .wait-banner .wb-links{display:flex;gap:8px;margin-top:8px;flex-wrap:wrap} | |
| .wait-banner .wb-btn{display:inline-flex;align-items:center;gap:5px;padding:7px 14px;font-size:12px;font-weight:600;border-radius:8px;border:1px solid var(--bd2);background:var(--s2);color:var(--tx);cursor:pointer;transition:all .15s;text-decoration:none} | |
| .wait-banner .wb-btn:hover{border-color:var(--ac);background:var(--s3);text-decoration:none} | |
| .wait-banner .wb-sub{color:var(--mt);font-size:11px;margin-top:4px} | |
| @keyframes fadeIn{from{opacity:0;transform:translateY(-6px)}to{opacity:1;transform:translateY(0)}} | |
| /* quick-results teaser */ | |
| .teaser{margin:0 24px;padding:10px 16px;background:var(--s1);border:1px solid var(--bd);border-radius:8px;font-size:12px;color:var(--mt);display:flex;align-items:center;gap:8px;flex-wrap:wrap} | |
| .teaser strong{color:var(--tx)} | |
| .teaser .t-link{color:var(--ac);cursor:pointer;font-weight:600;text-decoration:underline} | |
| /* mode toggle */ | |
| .mode-toggle{display:flex;flex-direction:column;gap:6px} | |
| .mode-opt{display:flex;gap:8px;align-items:flex-start;padding:8px 10px;border:1px solid var(--bd2);border-radius:8px;background:var(--s2);cursor:pointer;transition:border-color .12s} | |
| .mode-opt:hover{border-color:var(--ac)}.mode-opt input{margin-top:3px} | |
| .mode-opt div{display:flex;flex-direction:column}.mode-opt strong{font-size:12px}.mode-opt .desc{color:var(--mt);font-size:10px;line-height:1.4} | |
| /* source badge shown on cached results */ | |
| .src-badge{display:inline-block;font-size:10px;padding:3px 10px;border-radius:5px;font-weight:500;margin:6px 0} | |
| .src-cached{background:rgba(91,156,245,.12);color:var(--ac)} | |
| .src-live{background:rgba(61,214,140,.12);color:var(--gn)} | |
| /* loading spinner */ | |
| @keyframes spin{to{transform:rotate(360deg)}} | |
| .spinner{display:inline-block;width:14px;height:14px;border:2px solid var(--bd2);border-top-color:var(--ac);border-radius:50%;animation:spin .7s linear infinite;vertical-align:middle;margin-right:6px} | |
| /* metrics */ | |
| .mrow{display:grid;grid-template-columns:repeat(4,1fr);gap:8px;margin-top:10px} | |
| @media(max-width:600px){.mrow{grid-template-columns:repeat(2,1fr)}} | |
| .mc{background:var(--s2);border:1px solid var(--bd);border-radius:8px;padding:10px;display:flex;flex-direction:column;gap:1px} | |
| .mc .ml{font-size:9px;text-transform:uppercase;letter-spacing:.5px;color:var(--mt)} | |
| .mc .mv{font-family:var(--fm);font-size:20px;font-weight:500;font-feature-settings:"tnum"} | |
| .mc.hi{border-color:rgba(91,156,245,.3)}.mc.hi .mv{color:var(--ac)} | |
| /* canvas */ | |
| #canvas{width:100%;max-height:52vh;background:#f8fafc;border-radius:8px;margin-top:10px} | |
| /* trajectory */ | |
| .trj{margin:0 24px 20px;background:var(--s1);border:1px solid var(--bd);border-radius:12px;padding:16px} | |
| .trj summary{cursor:pointer;font-size:13px;font-weight:600;padding:2px 0}.trj summary:hover{color:var(--ac)} | |
| .trj-ct{color:var(--mt);font-weight:400;font-size:11px} | |
| table.t{width:100%;border-collapse:collapse;font-size:11px;font-family:var(--fm);margin-top:8px} | |
| table.t th,table.t td{padding:6px 8px;border-bottom:1px solid var(--bd);text-align:left} | |
| table.t th{font-size:9px;text-transform:uppercase;letter-spacing:.5px;color:var(--mt);font-weight:500} | |
| .rp{color:var(--gn)}.rn{color:var(--rd)}.rz{color:var(--mt)} | |
| .bg{display:inline-block;font-size:9px;padding:2px 6px;border-radius:5px;font-weight:500;letter-spacing:.3px} | |
| .bg-ft{background:rgba(61,214,140,.15);color:var(--gn)}.bg-h{background:rgba(136,146,166,.15);color:var(--mt)} | |
| .bg-rt{background:rgba(240,180,41,.15);color:var(--am)}.bg-b{background:rgba(240,180,41,.15);color:var(--am)} | |
| .bg-fb{background:rgba(239,100,97,.15);color:var(--rd)} | |
| pre.raw{white-space:pre-wrap;word-break:break-word;background:var(--s2);border-radius:6px;padding:8px;font-size:10px;color:#9ca8be;max-height:260px;overflow:auto;margin-top:6px} | |
| /* ββ BENCHMARK TAB ββ */ | |
| .bm{padding:20px 24px;max-width:900px} | |
| .bm h2{font-family:var(--fd);font-size:18px;margin:24px 0 8px;color:var(--tx)}.bm h2:first-child{margin-top:0} | |
| .bm p{color:var(--mt);font-size:13px;line-height:1.6;margin:6px 0} | |
| .bm table{width:100%;border-collapse:collapse;font-size:12px;font-family:var(--fm);margin:10px 0 16px} | |
| .bm th,.bm td{padding:8px 10px;border-bottom:1px solid var(--bd);text-align:left} | |
| .bm th{font-size:10px;text-transform:uppercase;color:var(--mt);font-weight:500} | |
| .bm .win{color:var(--gn);font-weight:600} | |
| .bm .tag{display:inline-block;font-size:10px;padding:2px 8px;border-radius:4px;font-weight:500} | |
| .bm .tag-g{background:rgba(61,214,140,.12);color:var(--gn)} | |
| .bm .tag-a{background:rgba(240,180,41,.12);color:var(--am)} | |
| .bm .tag-r{background:rgba(239,100,97,.12);color:var(--rd)} | |
| .bm blockquote{border-left:3px solid var(--ac);padding:8px 14px;margin:10px 0;background:rgba(91,156,245,.06);border-radius:0 6px 6px 0;font-size:12px;color:var(--mt)} | |
| .bm ul{color:var(--mt);font-size:13px;padding-left:20px;margin:6px 0} | |
| .bm li{margin:4px 0} | |
| .bm code{font-family:var(--fm);font-size:11px;background:var(--s2);padding:1px 5px;border-radius:3px} | |
| .bm a{color:var(--ac)} | |
| /* ββ ABOUT TAB ββ */ | |
| .abt{padding:20px 24px;max-width:900px} | |
| .abt h2{font-family:var(--fd);font-size:18px;margin:24px 0 8px}.abt h2:first-child{margin-top:0} | |
| .abt h3{font-size:14px;margin:16px 0 6px;color:var(--tx)} | |
| .abt p{color:var(--mt);font-size:13px;line-height:1.6;margin:6px 0} | |
| .abt a{color:var(--ac)} | |
| .abt ul{color:var(--mt);font-size:13px;padding-left:20px} | |
| .abt li{margin:4px 0} | |
| .abt code{font-family:var(--fm);font-size:11px;background:var(--s2);padding:1px 5px;border-radius:3px} | |
| .link-grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(260px,1fr));gap:10px;margin:12px 0} | |
| .link-card{background:var(--s1);border:1px solid var(--bd2);border-radius:10px;padding:12px 14px;transition:border-color .12s} | |
| .link-card:hover{border-color:var(--ac);text-decoration:none} | |
| .link-card .lc-t{font-size:13px;font-weight:600;color:var(--tx)} | |
| .link-card .lc-d{font-size:11px;color:var(--mt);margin-top:3px} | |
| .abt img{max-width:100%;border-radius:8px;margin:10px 0} | |
| /* ββ FOOTER ββ */ | |
| footer{text-align:center;padding:16px;font-size:10px;color:var(--mt);border-top:1px solid var(--bd)} | |
| footer a{color:var(--mt);margin:0 4px}footer a:hover{color:var(--ac)} | |
| .fenv{margin-top:4px;font-family:var(--fm);font-size:9px;color:rgba(255,255,255,.2)} | |
| </style> | |
| </head> | |
| <body> | |
| <!-- TAB BAR --> | |
| <nav class="tab-bar"> | |
| <span class="tab-bar-title">DesignGym 2.0</span> | |
| <button class="tab-btn active" data-tab="demo"><span class="tab-ico">▶</span> Demo</button> | |
| <button class="tab-btn" data-tab="bench"><span class="tab-ico">☰</span> Benchmark</button> | |
| <button class="tab-btn" data-tab="about"><span class="tab-ico">⚙</span> About & Links</button> | |
| <a class="blog-link" href="https://huggingface.co/spaces/yashvyasop/DesignGym/blob/main/Blog.md" target="_blank"><span class="tab-ico">📝</span> Blog</a> | |
| </nav> | |
| <!-- ==================== DEMO TAB ==================== --> | |
| <div class="tab-page active" id="page-demo"> | |
| <header class="hdr"> | |
| <p class="hdr-sub">Watch the agent design β compare heuristic vs fine-tuned LoRA models in real time</p> | |
| </header> | |
| <div class="teaser" id="teaser"> | |
| <span>⚡</span> | |
| <span><strong>Tip:</strong> Episodes take ~30-60s on CPU. Pre-computed results are ready in the <span class="t-link" id="go-bench">Benchmark</span> tab, or read the <span class="t-link" id="go-blog">Blog</span> while you wait.</span> | |
| </div> | |
| <div class="wait-banner" id="wait-banner"> | |
| <div class="wb-hd">⏳ Episode running on CPU...</div> | |
| <div class="wb-timer" id="wb-timer">0s</div> | |
| <div class="wb-sub">The model is running inference on CPU β each step takes a few seconds. Totally normal.</div> | |
| <div class="wb-links"> | |
| <span class="wb-btn" id="wb-bench">☰ View Benchmark Results</span> | |
| <a class="wb-btn" href="https://huggingface.co/spaces/yashvyasop/DesignGym/blob/main/Blog.md" target="_blank">📝 Read the Blog</a> | |
| <span class="wb-btn" id="wb-about">⚙ Project Links & Notebooks</span> | |
| </div> | |
| <div class="wb-sub" style="margin-top:8px">Results will appear here when done β you can navigate away and come back.</div> | |
| </div> | |
| <div class="bc" id="bc"> | |
| <div class="dot dot-m" id="bd"></div> | |
| <div class="bc-info"> | |
| <div class="bc-lbl" id="bl">Checking backend...</div> | |
| <div class="bc-det" id="bdet"></div> | |
| </div> | |
| <div class="sw"> | |
| <label>Adapter</label> | |
| <select id="asw"> | |
| <option value="sft">SFT</option> | |
| <option value="grpo">GRPO</option> | |
| <option value="base">Base (no LoRA)</option> | |
| </select> | |
| </div> | |
| </div> | |
| <div class="grid2"> | |
| <div class="pnl" id="ctrl"> | |
| <h2>Task</h2> | |
| <select id="task" class="sel"> | |
| <option value="poster_basic_v1">Poster β easy</option> | |
| <option value="editorial_cover_v1">Editorial Cover β medium</option> | |
| <option value="dense_flyer_v1">Dense Flyer β hard</option> | |
| </select> | |
| <span class="help-tip">Each task tests different layout skills: hierarchy, spacing, reading order</span> | |
| <h2>Policy</h2> | |
| <div class="pol-pick"> | |
| <label class="pol-c"> | |
| <input type="radio" name="pol" value="heuristic" checked /> | |
| <div> | |
| <strong>Heuristic Planner</strong> | |
| <span class="desc">Hand-coded rules. Instant. The baseline to beat.</span> | |
| </div> | |
| </label> | |
| <label class="pol-c" id="pol-llm"> | |
| <input type="radio" name="pol" value="llm" /> | |
| <div> | |
| <strong id="llm-lbl">LLM Picker</strong> | |
| <span class="desc" id="llm-desc">Uses the active adapter model to choose actions</span> | |
| </div> | |
| </label> | |
| </div> | |
| <span class="help-tip">Heuristic is the teacher that generated SFT training data. LLM is the student.</span> | |
| <h2>Run Mode</h2> | |
| <div class="mode-toggle"> | |
| <label class="mode-opt"> | |
| <input type="radio" name="runmode" value="cached" checked /> | |
| <div> | |
| <strong>Cached Result</strong> | |
| <span class="desc">Instant β shows pre-computed benchmark output (seed=0)</span> | |
| </div> | |
| </label> | |
| <label class="mode-opt"> | |
| <input type="radio" name="runmode" value="live" /> | |
| <div> | |
| <strong>Run Live</strong> | |
| <span class="desc">Execute on server CPU (~1-1.5 min per LLM episode)</span> | |
| </div> | |
| </label> | |
| </div> | |
| <h2>Run</h2> | |
| <div class="acts"> | |
| <button id="run" class="pri">Show Cached Result</button> | |
| <button id="rst" class="sec">Reset</button> | |
| </div> | |
| <span class="help-tip">Cached = instant pre-computed results. Live = real model inference on CPU.</span> | |
| <div id="sts" class="sts">Idle. Pick a task and click Run.</div> | |
| </div> | |
| <div class="pnl"> | |
| <div class="mrow"> | |
| <div class="mc hi"><span class="ml">Final Score</span><span class="mv" id="ms">β</span></div> | |
| <div class="mc"><span class="ml">Instruction</span><span class="mv" id="mi">β</span></div> | |
| <div class="mc"><span class="ml">Steps</span><span class="mv" id="mst">β</span></div> | |
| <div class="mc"><span class="ml">Total Reward</span><span class="mv" id="mr">β</span></div> | |
| </div> | |
| <svg id="canvas" viewBox="0 0 800 1000" preserveAspectRatio="xMidYMid meet"></svg> | |
| </div> | |
| </div> | |
| <div class="trj"> | |
| <details open> | |
| <summary>Trajectory <span class="trj-ct" id="tc">β no steps yet</span></summary> | |
| <table class="t"> | |
| <thead><tr><th>Step</th><th>Action</th><th>Reward</th><th>Score</th><th>Policy</th></tr></thead> | |
| <tbody id="tb"></tbody> | |
| </table> | |
| </details> | |
| <details> | |
| <summary>Raw JSON</summary> | |
| <pre class="raw" id="rj"></pre> | |
| </details> | |
| </div> | |
| </div> | |
| <!-- ==================== BENCHMARK TAB ==================== --> | |
| <div class="tab-page" id="page-bench"> | |
| <div class="bm"> | |
| <h2>Benchmark Results</h2> | |
| <p>36 episodes total: 4 backends × 3 tasks × 3 seeds. Deterministic environment, MPS (M1) inference. Every number is reproducible.</p> | |
| <h2>Overall Performance</h2> | |
| <table> | |
| <thead><tr><th>Backend</th><th>Instruction Score</th><th>Total Reward</th><th>Avg Time</th><th>LLM Steer Rate</th></tr></thead> | |
| <tbody> | |
| <tr><td>Heuristic</td><td>0.5564</td><td>1.588</td><td class="win">0.0s</td><td>β</td></tr> | |
| <tr><td>Base Qwen (no LoRA)</td><td>0.5367</td><td>1.679</td><td>11.5s</td><td>100%</td></tr> | |
| <tr><td>SFT Fine-tuned</td><td>0.5557</td><td>1.789</td><td>16.8s</td><td>100%</td></tr> | |
| <tr><td>GRPO Fine-tuned</td><td>0.5599</td><td class="win">1.854</td><td>12.0s</td><td>100%</td></tr> | |
| </tbody> | |
| </table> | |
| <h2>Per-Task Breakdown</h2> | |
| <table> | |
| <thead><tr><th>Task</th><th>Backend</th><th>Instr Score</th><th>Total Reward</th></tr></thead> | |
| <tbody> | |
| <tr><td rowspan="4">Poster (easy)</td><td>Heuristic</td><td>0.5033</td><td>1.319</td></tr> | |
| <tr><td>Base</td><td>0.5087</td><td>1.400</td></tr> | |
| <tr><td>SFT</td><td>0.5238</td><td>1.435</td></tr> | |
| <tr><td>GRPO</td><td>0.5129</td><td>1.455</td></tr> | |
| <tr><td rowspan="4">Editorial (med)</td><td>Heuristic</td><td>0.5424</td><td>1.544</td></tr> | |
| <tr><td>Base</td><td>0.4866</td><td>1.658</td></tr> | |
| <tr><td>SFT</td><td>0.4878</td><td>1.894</td></tr> | |
| <tr><td>GRPO</td><td>0.4795</td><td>1.966</td></tr> | |
| <tr><td rowspan="4">Dense Flyer (hard)</td><td>Heuristic</td><td>0.6235</td><td>1.900</td></tr> | |
| <tr><td>Base</td><td>0.6148</td><td>1.980</td></tr> | |
| <tr><td>SFT</td><td class="win">0.6555</td><td>2.038</td></tr> | |
| <tr><td>GRPO</td><td class="win">0.6872</td><td class="win">2.139</td></tr> | |
| </tbody> | |
| </table> | |
| <h2>Honest Assessment</h2> | |
| <blockquote>These results are real. No cherry-picking, no hidden runs. The environment is deterministic β re-run with the same seeds and you get the same numbers.</blockquote> | |
| <p><span class="tag tag-g">What works</span></p> | |
| <ul> | |
| <li><strong>SFT eliminated 0% → 100% valid JSON</strong> β the biggest win. Base Qwen cannot speak the action format at all. After SFT it can.</li> | |
| <li><strong>GRPO gets the highest total reward</strong> (1.854 avg) β it picks bolder, higher-payoff actions per step.</li> | |
| <li><strong>On the hardest task (dense_flyer)</strong>, both fine-tuned models beat base on instruction score: SFT 0.655 vs base 0.615, GRPO 0.687 vs base 0.615.</li> | |
| <li><strong>100% LLM steer rate</strong> β every step is model-driven, zero fallback to heuristic.</li> | |
| </ul> | |
| <p><span class="tag tag-a">What's honest</span></p> | |
| <ul> | |
| <li><strong>Heuristic still wins on final score</strong> (0.738 vs SFT 0.702). The hand-coded rules are a strong baseline because they were written with full knowledge of the reward function.</li> | |
| <li><strong>SFT and GRPO are ~equal to base on some tasks</strong> β the adapter lift is small (~0.5-2% on instruction score). More GRPO training budget would likely help.</li> | |
| <li>The 0.5B model is at the edge of what can reason about complex layout state β a larger base model (3B+) would likely show bigger adapter-vs-base differences.</li> | |
| </ul> | |
| <p><span class="tag tag-r">What to improve</span></p> | |
| <ul> | |
| <li><strong>More GRPO training</strong> β current run was limited to ~200 steps on free Colab GPU. State-of-the-art needs 1000+ steps with best-of-N sampling.</li> | |
| <li><strong>Reward shaping</strong> β GRPO's higher reward but lower final score suggests the reward function could better align per-step gains with end-of-episode quality.</li> | |
| <li><strong>Larger base model</strong> β Qwen 3B or 7B with LoRA would still fit in 16GB with quantization and would better handle the multi-metric reasoning.</li> | |
| <li><strong>Process reward model</strong> β train a critic that scores partial trajectories, giving GRPO denser signal than episode-end score alone.</li> | |
| </ul> | |
| </div> | |
| </div> | |
| <!-- ==================== ABOUT TAB ==================== --> | |
| <div class="tab-page" id="page-about"> | |
| <div class="abt"> | |
| <h2>What is DesignGym?</h2> | |
| <p>DesignGym 2.0 is an OpenEnv-compatible RL environment where an LLM agent learns to improve graphic layouts through sequential actions β move, resize, align, reflow, promote, finalize β evaluated by computable aesthetic metrics (overlap, alignment, spacing, hierarchy, reading order, instruction fit).</p> | |
| <p>The training pipeline: <strong>Heuristic Planner</strong> generates expert trajectories → <strong>SFT</strong> teaches the model the action interface (0% → 100% valid JSON) → <strong>GRPO</strong> learns which valid actions are better via environment reward.</p> | |
| <h2>Project Links</h2> | |
| <div class="link-grid"> | |
| <a class="link-card" href="https://github.com/canboyedits/DesignGym" target="_blank"> | |
| <div class="lc-t">GitHub Repo</div> | |
| <div class="lc-d">Full source: environment, training, inference, server</div> | |
| </a> | |
| <a class="link-card" href="https://huggingface.co/spaces/yashvyasop/DesignGym" target="_blank"> | |
| <div class="lc-t">HF Space (Live)</div> | |
| <div class="lc-d">This deployed demo on Hugging Face</div> | |
| </a> | |
| <a class="link-card" href="https://huggingface.co/yashvyasop/designgym2-sft-qwen05-lora" target="_blank"> | |
| <div class="lc-t">SFT LoRA Adapter</div> | |
| <div class="lc-d">Qwen2.5-0.5B + SFT fine-tune on heuristic data</div> | |
| </a> | |
| <a class="link-card" href="https://huggingface.co/yashvyasop/designgym2-grpo-qwen05-lora" target="_blank"> | |
| <div class="lc-t">GRPO LoRA Adapter</div> | |
| <div class="lc-d">Qwen2.5-0.5B + GRPO RL from environment reward</div> | |
| </a> | |
| <a class="link-card" href="https://colab.research.google.com/drive/1ZtjQSen19Sdmx8FOXvM-nb_AFDSNM_1C?usp=sharing" target="_blank"> | |
| <div class="lc-t">SFT Training Notebook</div> | |
| <div class="lc-d">Colab: data generation, training loop, eval</div> | |
| </a> | |
| <a class="link-card" href="https://colab.research.google.com/drive/1jw1waO-bc0Mk3U7-RBbomsIGFBWvA0aW?usp=sharing" target="_blank"> | |
| <div class="lc-t">GRPO Training Notebook</div> | |
| <div class="lc-d">Colab: GRPO with environment-in-the-loop reward</div> | |
| </a> | |
| <a class="link-card" href="https://colab.research.google.com/drive/1U1t9GVkc8sk2BeYCxoDnlHV1WMjYCpv1?usp=sharing" target="_blank"> | |
| <div class="lc-t">Evaluation Notebook</div> | |
| <div class="lc-d">Colab: base vs SFT vs GRPO head-to-head eval</div> | |
| </a> | |
| <a class="link-card" href="https://huggingface.co/jobs/yashvyasop/69ed7b02d70108f37acdf597" target="_blank"> | |
| <div class="lc-t">HF Training Logs</div> | |
| <div class="lc-d">Hugging Face training job telemetry</div> | |
| </a> | |
| </div> | |
| <h2>Training Pipeline</h2> | |
| <img src="/assets/Architectural_Diagram.png" alt="DesignGym architecture diagram" /> | |
| <p>End-to-end: OpenEnv environment → heuristic planner bootstraps SFT data → SFT adapter locks in the action interface → GRPO learns design preference from verifiable reward.</p> | |
| <h3>SFT: Teaching the Interface</h3> | |
| <p>Base Qwen 0.5B understands design language but cannot produce executable JSON actions. SFT on heuristic planner trajectories achieves <strong>0% → 100% valid JSON</strong> β a capability phase transition, not just a fine-tune.</p> | |
| <img src="/assets/SFT_plot_collage.png" alt="SFT training metrics" /> | |
| <h3>GRPO: Learning Preference</h3> | |
| <p>Once the model can act, GRPO teaches it <em>which</em> valid action is better. It samples multiple candidates, executes them in the environment, and increases probability of higher-reward actions. No reward model needed β the environment is the oracle.</p> | |
| <h3>Results from Training (Blog Table)</h3> | |
| <table style="width:100%;border-collapse:collapse;font-size:12px;font-family:var(--fm);margin:10px 0"> | |
| <thead><tr style="border-bottom:1px solid var(--bd)"><th style="padding:8px;text-align:left;color:var(--mt);font-size:10px">Policy</th><th style="padding:8px;text-align:left;color:var(--mt);font-size:10px">Final Score</th><th style="padding:8px;text-align:left;color:var(--mt);font-size:10px">Instr Score</th><th style="padding:8px;text-align:left;color:var(--mt);font-size:10px">Valid JSON</th><th style="padding:8px;text-align:left;color:var(--mt);font-size:10px">Early Finalize</th></tr></thead> | |
| <tbody> | |
| <tr style="border-bottom:1px solid var(--bd)"><td style="padding:8px">Base Qwen 0.5B</td><td style="padding:8px">0.6948</td><td style="padding:8px">0.5360</td><td style="padding:8px;color:var(--rd)">0%</td><td style="padding:8px;color:var(--rd)">100%</td></tr> | |
| <tr style="border-bottom:1px solid var(--bd)"><td style="padding:8px">SFT Qwen 0.5B</td><td style="padding:8px;color:var(--gn)">0.7101</td><td style="padding:8px;color:var(--gn)">0.6263</td><td style="padding:8px;color:var(--gn)">100%</td><td style="padding:8px;color:var(--gn)">0%</td></tr> | |
| <tr style="border-bottom:1px solid var(--bd)"><td style="padding:8px">GRPO Qwen 0.5B</td><td style="padding:8px">0.6717</td><td style="padding:8px">0.5483</td><td style="padding:8px">98%</td><td style="padding:8px">67%</td></tr> | |
| <tr style="border-bottom:1px solid var(--bd)"><td style="padding:8px">GRPO Best-of-4</td><td style="padding:8px">0.6781</td><td style="padding:8px;color:var(--gn)">0.5817</td><td style="padding:8px;color:var(--gn)">100%</td><td style="padding:8px">17%</td></tr> | |
| </tbody> | |
| </table> | |
| <h2>How to Make It Better</h2> | |
| <ul> | |
| <li><strong>More GRPO budget:</strong> Current training was ~200 steps on free Colab T4. Papers show 1000-5000 steps with best-of-N=8 for significant RL lift.</li> | |
| <li><strong>Larger base model:</strong> Qwen 3B or 7B with 4-bit LoRA would better handle multi-metric reasoning while still fitting in 16GB.</li> | |
| <li><strong>Process reward model:</strong> Train a critic on partial trajectories to give GRPO denser signal than end-of-episode score.</li> | |
| <li><strong>Curriculum learning:</strong> Start GRPO on easy tasks (poster), then progress to hard (dense_flyer) β the agent currently trains on all tasks equally.</li> | |
| <li><strong>Reward alignment:</strong> GRPO's high total reward but lower final score suggests per-step reward doesn't fully correlate with episode quality. Tune the shaping function.</li> | |
| </ul> | |
| <h2>Environment</h2> | |
| <p>3 tasks (poster, editorial, dense flyer) testing different layout skills. Deterministic scoring via 7 computable aesthetic metrics. Fully OpenEnv-compatible: <code>reset()</code>, <code>step(action)</code>, typed Pydantic models, FastAPI server, Docker deployment.</p> | |
| </div> | |
| </div> | |
| <footer> | |
| <a href="https://github.com/canboyedits/DesignGym" target="_blank">GitHub</a> · | |
| <a href="https://huggingface.co/spaces/yashvyasop/DesignGym" target="_blank">HF Space</a> · | |
| <a href="/info" target="_blank">API /info</a> · | |
| <a href="/demo/backend_info" target="_blank">Backend Info</a> | |
| <div class="fenv" id="fenv"></div> | |
| </footer> | |
| <script> | |
| (function(){ | |
| /* ββ Safari compat: ES5 only, no ?. or ?? or => ββ */ | |
| var $ = function(id){ return document.getElementById(id); }; | |
| var fmt = function(n,d){ d=d||3; return (n==null||isNaN(+n))?'β':(+n).toFixed(d); }; | |
| /* ββ Cached benchmark results (seed=0, pre-computed on MPS M1) ββ */ | |
| var CACHED = {"heuristic:poster_basic_v1":{"summary":{"final_score":0.7697,"instruction_score":0.4988,"steps_taken":7,"total_reward":1.318,"wall_time_sec":0.0},"trajectory":[{"step":1,"action":"resize","reward":0.1256,"score":0.7257,"policy":"heuristic"},{"step":2,"action":"resize","reward":0.1792,"score":0.7361,"policy":"heuristic"},{"step":3,"action":"resize","reward":0.2552,"score":0.7491,"policy":"heuristic"},{"step":4,"action":"resize","reward":0.255,"score":0.7616,"policy":"heuristic"},{"step":5,"action":"resize","reward":0.2542,"score":0.772,"policy":"heuristic"},{"step":6,"action":"reflow_group","reward":0.1225,"score":0.7543,"policy":"heuristic"},{"step":7,"action":"reflow_group","reward":0.1263,"score":0.7697,"policy":"heuristic"}],"final_state":{"elements":[{"id":"title","bbox":[0.1138,0.0903,0.5,0.12],"role":"title"},{"id":"subtitle","bbox":[0.1138,0.2304,0.44,0.08],"role":"subtitle"},{"id":"hero_image","bbox":[0.1455,0.2862,0.734,0.4588],"role":"image"},{"id":"cta","bbox":[0.0764,0.7521,0.3,0.1],"role":"cta"},{"id":"logo","bbox":[0.7191,0.1033,0.14,0.14],"role":"logo"},{"id":"badge","bbox":[0.6963,0.6902,0.2,0.14],"role":"badge"}]}},"heuristic:editorial_cover_v1":{"summary":{"final_score":0.7659,"instruction_score":0.5471,"steps_taken":9,"total_reward":1.558,"wall_time_sec":0.0},"trajectory":[{"step":1,"action":"resize","reward":0.1287,"score":0.8052,"policy":"heuristic"},{"step":2,"action":"resize","reward":0.175,"score":0.7825,"policy":"heuristic"},{"step":3,"action":"resize","reward":0.1752,"score":0.7832,"policy":"heuristic"},{"step":4,"action":"resize","reward":0.2507,"score":0.7859,"policy":"heuristic"},{"step":5,"action":"reflow_group","reward":0.2634,"score":0.7659,"policy":"heuristic"},{"step":6,"action":"reflow_group","reward":0.2539,"score":0.7815,"policy":"heuristic"},{"step":7,"action":"reflow_group","reward":0.0925,"score":0.7659,"policy":"heuristic"},{"step":8,"action":"reflow_group","reward":0.1264,"score":0.7815,"policy":"heuristic"},{"step":9,"action":"reflow_group","reward":0.0925,"score":0.7659,"policy":"heuristic"}],"final_state":{"elements":[{"id":"masthead","bbox":[0.1124,0.0693,0.56,0.1],"role":"title"},{"id":"hero_image","bbox":[0.0771,0.1713,0.8,0.46],"role":"image"},{"id":"headline_1","bbox":[0.1204,0.6266,0.54,0.1],"role":"title"},{"id":"headline_2","bbox":[0.1204,0.7548,0.46,0.08],"role":"subtitle"},{"id":"headline_3","bbox":[0.1204,0.863,0.4,0.06],"role":"subtitle"},{"id":"teaser","bbox":[0.7347,0.6902,0.16,0.12],"role":"badge"},{"id":"barcode","bbox":[0.7721,0.88,0.12,0.08],"role":"caption"},{"id":"logo","bbox":[0.1043,0.871,0.12,0.08],"role":"logo"}]}},"heuristic:dense_flyer_v1":{"summary":{"final_score":0.6547,"instruction_score":0.6301,"steps_taken":10,"total_reward":1.901,"wall_time_sec":0.0},"trajectory":[{"step":1,"action":"resize","reward":0.1253,"score":0.5824,"policy":"heuristic"},{"step":2,"action":"resize","reward":0.1906,"score":0.6214,"policy":"heuristic"},{"step":3,"action":"resize","reward":0.1759,"score":0.6238,"policy":"heuristic"},{"step":4,"action":"resize","reward":0.1768,"score":0.6263,"policy":"heuristic"},{"step":5,"action":"resize","reward":0.251,"score":0.6272,"policy":"heuristic"},{"step":6,"action":"resize","reward":0.2506,"score":0.5801,"policy":"heuristic"},{"step":7,"action":"resize","reward":0.264,"score":0.6315,"policy":"heuristic"},{"step":8,"action":"resize","reward":0.1714,"score":0.6405,"policy":"heuristic"},{"step":9,"action":"resize","reward":0.1708,"score":0.6485,"policy":"heuristic"},{"step":10,"action":"resize","reward":0.125,"score":0.6547,"policy":"heuristic"}],"final_state":{"elements":[{"id":"title","bbox":[0.091,0.0883,0.56,0.1],"role":"title"},{"id":"image_left","bbox":[0.0975,0.2323,0.264,0.22],"role":"image"},{"id":"image_right","bbox":[0.4204,0.257,0.264,0.22],"role":"image"},{"id":"price_badge","bbox":[0.7491,0.2337,0.16,0.12],"role":"badge"},{"id":"cta","bbox":[0.6593,0.4027,0.2,0.1],"role":"cta"},{"id":"details","bbox":[0.0731,0.4201,0.72,0.24],"role":"body"},{"id":"caption_1","bbox":[0.093,0.7082,0.22,0.1],"role":"caption"},{"id":"caption_2","bbox":[0.3838,0.722,0.22,0.1],"role":"caption"},{"id":"sponsor_strip","bbox":[0.1131,0.89,0.78,0.07],"role":"caption"}]}},"base:poster_basic_v1":{"summary":{"final_score":0.7443,"instruction_score":0.5041,"steps_taken":7,"total_reward":1.425,"wall_time_sec":9.9},"trajectory":[{"step":1,"action":"resize","reward":0.1256,"score":0.7257,"policy":"local_base"},{"step":2,"action":"resize","reward":0.1792,"score":0.7361,"policy":"local_base"},{"step":3,"action":"reflow_group","reward":0.25,"score":0.7206,"policy":"local_base"},{"step":4,"action":"align","reward":0.2537,"score":0.7353,"policy":"local_base"},{"step":5,"action":"promote","reward":0.2511,"score":0.7385,"policy":"local_base"},{"step":6,"action":"move","reward":0.2414,"score":0.7402,"policy":"local_base"},{"step":7,"action":"resize","reward":0.1244,"score":0.7443,"policy":"local_base"}],"final_state":{"elements":[{"id":"title","bbox":[0.0938,0.0803,0.54,0.14],"role":"title"},{"id":"subtitle","bbox":[0.1138,0.2304,0.44,0.08],"role":"subtitle"},{"id":"hero_image","bbox":[0.1905,0.3012,0.654,0.4088],"role":"image"},{"id":"cta","bbox":[0.0764,0.7521,0.3,0.1],"role":"cta"},{"id":"logo","bbox":[0.7191,0.1033,0.14,0.14],"role":"logo"},{"id":"badge","bbox":[0.6963,0.6902,0.2,0.14],"role":"badge"}]}},"base:editorial_cover_v1":{"summary":{"final_score":0.7485,"instruction_score":0.4837,"steps_taken":9,"total_reward":1.638,"wall_time_sec":10.0},"trajectory":[{"step":1,"action":"promote","reward":0.1225,"score":0.7711,"policy":"local_base"},{"step":2,"action":"resize","reward":0.1803,"score":0.7914,"policy":"local_base"},{"step":3,"action":"distribute","reward":0.1242,"score":0.7956,"policy":"local_base"},{"step":4,"action":"reflow_group","reward":0.2631,"score":0.7346,"policy":"local_base"},{"step":5,"action":"promote","reward":0.2543,"score":0.7517,"policy":"local_base"},{"step":6,"action":"distribute","reward":0.22,"score":0.7293,"policy":"local_base"},{"step":7,"action":"reflow_group","reward":0.1267,"score":0.746,"policy":"local_base"},{"step":8,"action":"promote","reward":0.22,"score":0.7296,"policy":"local_base"},{"step":9,"action":"resize","reward":0.1272,"score":0.7485,"policy":"local_base"}],"final_state":{"elements":[{"id":"masthead","bbox":[0.1124,0.0693,0.56,0.1],"role":"title"},{"id":"hero_image","bbox":[0.1121,0.1963,0.73,0.41],"role":"image"},{"id":"headline_1","bbox":[0.0604,0.3307,0.64,0.14],"role":"title"},{"id":"headline_2","bbox":[0.0804,0.6318,0.46,0.08],"role":"subtitle"},{"id":"headline_3","bbox":[0.0804,0.863,0.4,0.06],"role":"subtitle"},{"id":"teaser","bbox":[0.7347,0.6902,0.16,0.12],"role":"badge"},{"id":"barcode","bbox":[0.7721,0.88,0.12,0.08],"role":"caption"},{"id":"logo","bbox":[0.1043,0.871,0.12,0.08],"role":"logo"}]}},"base:dense_flyer_v1":{"summary":{"final_score":0.5918,"instruction_score":0.6159,"steps_taken":10,"total_reward":1.876,"wall_time_sec":15.4},"trajectory":[{"step":1,"action":"reflow_group","reward":0.1375,"score":0.5617,"policy":"local_base"},{"step":2,"action":"promote","reward":0.1774,"score":0.5715,"policy":"local_base"},{"step":3,"action":"align","reward":0.22,"score":0.56,"policy":"local_base"},{"step":4,"action":"resize","reward":0.1768,"score":0.5672,"policy":"local_base"},{"step":5,"action":"resize","reward":0.2517,"score":0.5742,"policy":"local_base"},{"step":6,"action":"align","reward":0.22,"score":0.5627,"policy":"local_base"},{"step":7,"action":"reflow_group","reward":0.2529,"score":0.5742,"policy":"local_base"},{"step":8,"action":"resize","reward":0.1785,"score":0.6021,"policy":"local_base"},{"step":9,"action":"resize","reward":0.1682,"score":0.6039,"policy":"local_base"},{"step":10,"action":"align","reward":0.0925,"score":0.5918,"policy":"local_base"}],"final_state":{"elements":[{"id":"title","bbox":[0.071,0.0783,0.6,0.12],"role":"title"},{"id":"image_left","bbox":[0.0975,0.2323,0.264,0.22],"role":"image"},{"id":"image_right","bbox":[0.4204,0.257,0.264,0.22],"role":"image"},{"id":"price_badge","bbox":[0.7491,0.2337,0.16,0.12],"role":"badge"},{"id":"cta","bbox":[0.6593,0.4027,0.2,0.1],"role":"cta"},{"id":"details","bbox":[0.1731,0.4901,0.52,0.24],"role":"body"},{"id":"caption_1","bbox":[0.093,0.7082,0.22,0.1],"role":"caption"},{"id":"caption_2","bbox":[0.3838,0.7082,0.22,0.1],"role":"caption"},{"id":"sponsor_strip","bbox":[0.1131,0.89,0.78,0.07],"role":"caption"}]}},"sft:poster_basic_v1":{"summary":{"final_score":0.7709,"instruction_score":0.5176,"steps_taken":7,"total_reward":1.476,"wall_time_sec":9.9},"trajectory":[{"step":1,"action":"reflow_group","reward":0.1675,"score":0.7036,"policy":"finetuned_sft"},{"step":2,"action":"align","reward":0.2536,"score":0.718,"policy":"finetuned_sft"},{"step":3,"action":"anchor_to_region","reward":0.1984,"score":0.7481,"policy":"finetuned_sft"},{"step":4,"action":"promote","reward":0.25,"score":0.7105,"policy":"finetuned_sft"},{"step":5,"action":"align","reward":0.254,"score":0.7264,"policy":"finetuned_sft"},{"step":6,"action":"move","reward":0.2108,"score":0.7094,"policy":"finetuned_sft"},{"step":7,"action":"resize","reward":0.1413,"score":0.7709,"policy":"finetuned_sft"}],"final_state":{"elements":[{"id":"title","bbox":[0.0938,0.0803,0.54,0.14],"role":"title"},{"id":"subtitle","bbox":[0.0938,0.2304,0.44,0.08],"role":"subtitle"},{"id":"hero_image","bbox":[0.2205,0.3212,0.596,0.3725],"role":"image"},{"id":"cta","bbox":[0.0764,0.7521,0.3,0.1],"role":"cta"},{"id":"logo","bbox":[0.7244,0.0528,0.14,0.14],"role":"logo"},{"id":"badge","bbox":[0.6963,0.6902,0.2,0.14],"role":"badge"}]}},"sft:editorial_cover_v1":{"summary":{"final_score":0.7491,"instruction_score":0.4724,"steps_taken":9,"total_reward":1.999,"wall_time_sec":13.5},"trajectory":[{"step":1,"action":"anchor_to_region","reward":0.1798,"score":0.796,"policy":"finetuned_sft"},{"step":2,"action":"anchor_to_region","reward":0.22,"score":0.7801,"policy":"finetuned_sft"},{"step":3,"action":"align","reward":0.2527,"score":0.7908,"policy":"finetuned_sft"},{"step":4,"action":"distribute","reward":0.25,"score":0.7559,"policy":"finetuned_sft"},{"step":5,"action":"resize","reward":0.2593,"score":0.7931,"policy":"finetuned_sft"},{"step":6,"action":"promote","reward":0.2506,"score":0.7953,"policy":"finetuned_sft"},{"step":7,"action":"move","reward":0.21,"score":0.7559,"policy":"finetuned_sft"},{"step":8,"action":"align","reward":0.2538,"score":0.7705,"policy":"finetuned_sft"},{"step":9,"action":"resize","reward":0.1225,"score":0.7491,"policy":"finetuned_sft"}],"final_state":{"elements":[{"id":"masthead","bbox":[0.1004,0.0728,0.56,0.1],"role":"title"},{"id":"hero_image","bbox":[0.1221,0.1863,0.73,0.41],"role":"image"},{"id":"headline_1","bbox":[0.1004,0.4029,0.58,0.12],"role":"title"},{"id":"headline_2","bbox":[0.1004,0.7529,0.46,0.08],"role":"subtitle"},{"id":"headline_3","bbox":[0.2492,0.863,0.4,0.06],"role":"subtitle"},{"id":"teaser","bbox":[0.7347,0.6902,0.16,0.12],"role":"badge"},{"id":"barcode","bbox":[0.7721,0.88,0.12,0.08],"role":"caption"},{"id":"logo","bbox":[0.1043,0.871,0.12,0.08],"role":"logo"}]}},"sft:dense_flyer_v1":{"summary":{"final_score":0.6172,"instruction_score":0.6241,"steps_taken":10,"total_reward":2.055,"wall_time_sec":18.3},"trajectory":[{"step":1,"action":"resize","reward":0.1253,"score":0.5824,"policy":"finetuned_sft"},{"step":2,"action":"align","reward":0.22,"score":0.5689,"policy":"finetuned_sft"},{"step":3,"action":"resize","reward":0.177,"score":0.577,"policy":"finetuned_sft"},{"step":4,"action":"resize","reward":0.1887,"score":0.6132,"policy":"finetuned_sft"},{"step":5,"action":"resize","reward":0.2511,"score":0.615,"policy":"finetuned_sft"},{"step":6,"action":"align","reward":0.22,"score":0.6027,"policy":"finetuned_sft"},{"step":7,"action":"reflow_group","reward":0.2531,"score":0.615,"policy":"finetuned_sft"},{"step":8,"action":"promote","reward":0.25,"score":0.5934,"policy":"finetuned_sft"},{"step":9,"action":"align","reward":0.253,"score":0.6055,"policy":"finetuned_sft"},{"step":10,"action":"move","reward":0.117,"score":0.6172,"policy":"finetuned_sft"}],"final_state":{"elements":[{"id":"title","bbox":[0.081,0.0683,0.6,0.12],"role":"title"},{"id":"image_left","bbox":[0.0975,0.2323,0.264,0.22],"role":"image"},{"id":"image_right","bbox":[0.4204,0.257,0.264,0.22],"role":"image"},{"id":"price_badge","bbox":[0.7491,0.2337,0.16,0.12],"role":"badge"},{"id":"cta","bbox":[0.6593,0.4027,0.2,0.1],"role":"cta"},{"id":"details","bbox":[0.1531,0.4851,0.56,0.24],"role":"body"},{"id":"caption_1","bbox":[0.093,0.7082,0.22,0.1],"role":"caption"},{"id":"caption_2","bbox":[0.3838,0.7082,0.22,0.1],"role":"caption"},{"id":"sponsor_strip","bbox":[0.1131,0.89,0.78,0.07],"role":"caption"}]}},"grpo:poster_basic_v1":{"summary":{"final_score":0.7294,"instruction_score":0.4981,"steps_taken":7,"total_reward":1.488,"wall_time_sec":9.9},"trajectory":[{"step":1,"action":"reflow_group","reward":0.1675,"score":0.7036,"policy":"finetuned_grpo"},{"step":2,"action":"reflow_group","reward":0.2536,"score":0.718,"policy":"finetuned_grpo"},{"step":3,"action":"align","reward":0.22,"score":0.7036,"policy":"finetuned_grpo"},{"step":4,"action":"promote","reward":0.2547,"score":0.721,"policy":"finetuned_grpo"},{"step":5,"action":"move","reward":0.2108,"score":0.6999,"policy":"finetuned_grpo"},{"step":6,"action":"align","reward":0.254,"score":0.7159,"policy":"finetuned_grpo"},{"step":7,"action":"resize","reward":0.1271,"score":0.7294,"policy":"finetuned_grpo"}],"final_state":{"elements":[{"id":"title","bbox":[0.0938,0.0803,0.54,0.14],"role":"title"},{"id":"subtitle","bbox":[0.0938,0.2304,0.44,0.08],"role":"subtitle"},{"id":"hero_image","bbox":[0.2205,0.3212,0.596,0.3725],"role":"image"},{"id":"cta","bbox":[0.0764,0.7521,0.3,0.1],"role":"cta"},{"id":"logo","bbox":[0.7191,0.1033,0.14,0.14],"role":"logo"},{"id":"badge","bbox":[0.6963,0.6902,0.2,0.14],"role":"badge"}]}},"grpo:editorial_cover_v1":{"summary":{"final_score":0.7491,"instruction_score":0.4724,"steps_taken":9,"total_reward":1.999,"wall_time_sec":10.0},"trajectory":[{"step":1,"action":"anchor_to_region","reward":0.1798,"score":0.796,"policy":"finetuned_grpo"},{"step":2,"action":"anchor_to_region","reward":0.22,"score":0.7801,"policy":"finetuned_grpo"},{"step":3,"action":"align","reward":0.2527,"score":0.7908,"policy":"finetuned_grpo"},{"step":4,"action":"distribute","reward":0.25,"score":0.7559,"policy":"finetuned_grpo"},{"step":5,"action":"resize","reward":0.2593,"score":0.7931,"policy":"finetuned_grpo"},{"step":6,"action":"promote","reward":0.2506,"score":0.7953,"policy":"finetuned_grpo"},{"step":7,"action":"move","reward":0.21,"score":0.7559,"policy":"finetuned_grpo"},{"step":8,"action":"align","reward":0.2538,"score":0.7705,"policy":"finetuned_grpo"},{"step":9,"action":"resize","reward":0.1225,"score":0.7491,"policy":"finetuned_grpo"}],"final_state":{"elements":[{"id":"masthead","bbox":[0.1004,0.0728,0.56,0.1],"role":"title"},{"id":"hero_image","bbox":[0.1221,0.1863,0.73,0.41],"role":"image"},{"id":"headline_1","bbox":[0.1004,0.4029,0.58,0.12],"role":"title"},{"id":"headline_2","bbox":[0.1004,0.7529,0.46,0.08],"role":"subtitle"},{"id":"headline_3","bbox":[0.2492,0.863,0.4,0.06],"role":"subtitle"},{"id":"teaser","bbox":[0.7347,0.6902,0.16,0.12],"role":"badge"},{"id":"barcode","bbox":[0.7721,0.88,0.12,0.08],"role":"caption"},{"id":"logo","bbox":[0.1043,0.871,0.12,0.08],"role":"logo"}]}},"grpo:dense_flyer_v1":{"summary":{"final_score":0.6204,"instruction_score":0.7181,"steps_taken":10,"total_reward":2.174,"wall_time_sec":15.8},"trajectory":[{"step":1,"action":"align","reward":0.1375,"score":0.5617,"policy":"finetuned_grpo"},{"step":2,"action":"anchor_to_region","reward":0.2952,"score":0.6324,"policy":"finetuned_grpo"},{"step":3,"action":"promote","reward":0.175,"score":0.6136,"policy":"finetuned_grpo"},{"step":4,"action":"align","reward":0.2531,"score":0.6261,"policy":"finetuned_grpo"},{"step":5,"action":"reflow_group","reward":0.22,"score":0.6136,"policy":"finetuned_grpo"},{"step":6,"action":"promote","reward":0.2518,"score":0.6207,"policy":"finetuned_grpo"},{"step":7,"action":"move","reward":0.2432,"score":0.6284,"policy":"finetuned_grpo"},{"step":8,"action":"align","reward":0.22,"score":0.6158,"policy":"finetuned_grpo"},{"step":9,"action":"promote","reward":0.2554,"score":0.635,"policy":"finetuned_grpo"},{"step":10,"action":"resize","reward":0.1225,"score":0.6204,"policy":"finetuned_grpo"}],"final_state":{"elements":[{"id":"title","bbox":[0.041,0.0483,0.68,0.14],"role":"title"},{"id":"image_left","bbox":[0.0975,0.2323,0.264,0.22],"role":"image"},{"id":"image_right","bbox":[0.4204,0.257,0.264,0.22],"role":"image"},{"id":"price_badge","bbox":[0.7491,0.2337,0.16,0.12],"role":"badge"},{"id":"cta","bbox":[0.6392,0.6892,0.2,0.1],"role":"cta"},{"id":"details","bbox":[0.1831,0.5151,0.5,0.19],"role":"body"},{"id":"caption_1","bbox":[0.093,0.7082,0.22,0.1],"role":"caption"},{"id":"caption_2","bbox":[0.3838,0.7082,0.22,0.1],"role":"caption"},{"id":"sponsor_strip","bbox":[0.1131,0.89,0.78,0.07],"role":"caption"}]}}}; | |
| /* ββ Tabs ββ */ | |
| var tabs = document.querySelectorAll('.tab-btn'); | |
| var pages = document.querySelectorAll('.tab-page'); | |
| function switchToTab(tabName){ | |
| for(var j=0;j<tabs.length;j++){ | |
| tabs[j].classList.remove('active'); | |
| pages[j].classList.remove('active'); | |
| } | |
| for(var k=0;k<tabs.length;k++){ | |
| if(tabs[k].getAttribute('data-tab')===tabName){ | |
| tabs[k].classList.add('active'); | |
| break; | |
| } | |
| } | |
| var pg = document.getElementById('page-'+tabName); | |
| if(pg) pg.classList.add('active'); | |
| } | |
| for(var i=0;i<tabs.length;i++){ | |
| tabs[i].addEventListener('click', function(e){ | |
| switchToTab(e.currentTarget.getAttribute('data-tab')); | |
| }); | |
| } | |
| var backendInfo = {}; | |
| var pollTimer = null; | |
| /* ββ Backend ββ */ | |
| function fetchBI(){ | |
| return fetch('/demo/backend_info').then(function(r){ | |
| if(!r.ok) throw new Error('HTTP '+r.status); | |
| return r.json(); | |
| }).then(function(info){ | |
| backendInfo = info; | |
| renderBC(info); | |
| return info; | |
| })['catch'](function(){ | |
| $('bl').textContent='Backend unavailable'; | |
| $('bd').className='dot dot-r'; | |
| }); | |
| } | |
| function renderBC(info){ | |
| var d=$('bd'),l=$('bl'),det=$('bdet'); | |
| var b = (info && info.backend) ? info.backend : 'unknown'; | |
| if(b==='local-lora'){ | |
| if(info.loading && !info.ready){ | |
| d.className='dot dot-a'; | |
| l.innerHTML='<span class="spinner"></span>Loading '+(info.adapter_key||'sft').toUpperCase()+' adapterβ¦'; | |
| det.textContent='Model warming up in background. Will auto-refresh.'; | |
| startPoll(); | |
| } else if(info.ready){ | |
| d.className='dot dot-g'; | |
| l.textContent='Fine-tuned '+(info.adapter_key||'').toUpperCase()+' \xb7 Qwen2.5-0.5B + LoRA ('+(info.device||'cpu')+')'; | |
| det.textContent='Loaded in '+(info.load_seconds||'?')+'s'; | |
| stopPoll(); | |
| } else { | |
| d.className='dot dot-m'; | |
| l.textContent='Local LoRA (not yet loaded)'; | |
| det.textContent='Loads on first request'; | |
| } | |
| } else if(b==='local-base'){ | |
| d.className='dot dot-a'; | |
| l.textContent='Base Qwen2.5-0.5B ('+(info.device||'cpu')+', no adapter)'; | |
| det.textContent='Running without LoRA fine-tuning'; | |
| stopPoll(); | |
| } else if(b==='router'){ | |
| d.className='dot dot-a'; | |
| l.textContent='Base Qwen2.5-0.5B (HF Router) β NOT fine-tuned'; | |
| det.textContent='Using HF Inference Router. Base model only.'; | |
| stopPoll(); | |
| } else if(info && info.load_error){ | |
| d.className='dot dot-r'; | |
| l.textContent='Load error'; | |
| det.textContent=info.load_error; | |
| stopPoll(); | |
| } else { | |
| d.className='dot dot-m'; | |
| l.textContent='No LLM backend'; | |
| det.textContent='Heuristic only'; | |
| stopPoll(); | |
| } | |
| var ll=$('llm-lbl'), ld=$('llm-desc'); | |
| if(b==='local-lora'){ | |
| var ak=(info.adapter_key||'SFT').toUpperCase(); | |
| ll.textContent=ak+' LoRA Picker'; | |
| ld.textContent='Fine-tuned '+ak+' model chooses layout actions on '+(info.device||'cpu'); | |
| } else if(b==='local-base'){ | |
| ll.textContent='Base LLM Picker'; | |
| ld.textContent='Base model, no adapter β the control experiment'; | |
| } else if(b==='router'){ | |
| ll.textContent='Router LLM (base)'; | |
| ld.textContent='HF Router β NOT fine-tuned'; | |
| } else { | |
| ll.textContent='LLM Picker (unavailable)'; | |
| ld.textContent='No model loaded'; | |
| } | |
| var env = (info && info.env) ? info.env : {}; | |
| $('fenv').textContent='BACKEND='+(env.DESIGNGYM_BACKEND||'?')+' ADAPTER='+(env.DESIGNGYM_ADAPTER||'?')+' HF_TOKEN='+(env.HF_TOKEN_present?'set':'unset'); | |
| } | |
| function startPoll(){if(!pollTimer) pollTimer=setInterval(fetchBI,3000);} | |
| function stopPoll(){if(pollTimer){clearInterval(pollTimer);pollTimer=null;}} | |
| /* ββ Adapter switch ββ */ | |
| $('asw').addEventListener('change',function(){ | |
| var key=this.value; | |
| $('bl').innerHTML='<span class="spinner"></span>Switching to '+key.toUpperCase()+'β¦'; | |
| $('bd').className='dot dot-a'; | |
| fetch('/demo/switch_adapter',{ | |
| method:'POST', | |
| headers:{'Content-Type':'application/json'}, | |
| body:JSON.stringify({adapter:key}) | |
| }).then(function(r){return r.json();}).then(function(d){ | |
| if(d.error) throw new Error(d.error); | |
| fetchBI(); | |
| })['catch'](function(e){ | |
| $('bl').textContent='Switch failed: '+e.message; | |
| $('bd').className='dot dot-r'; | |
| }); | |
| }); | |
| /* ββ Canvas ββ */ | |
| var cColors={title:'#bfdbfe',subtitle:'#dbeafe',image:'#bbf7d0',cta:'#fecaca',logo:'#fde68a',badge:'#ddd6fe',body:'#e2e8f0',caption:'#fef3c7',shape:'#ddd6fe',masthead:'#fed7aa',headline:'#a7f3d0'}; | |
| function drawState(state){ | |
| var c=$('canvas'); c.innerHTML=''; | |
| var ns='http://www.w3.org/2000/svg'; | |
| var bg=document.createElementNS(ns,'rect'); | |
| bg.setAttribute('x',0);bg.setAttribute('y',0);bg.setAttribute('width',800);bg.setAttribute('height',1000);bg.setAttribute('fill','#f8fafc'); | |
| c.appendChild(bg); | |
| var els=(state&&state.elements)?state.elements:[]; | |
| if(!els.length){ | |
| var t=document.createElementNS(ns,'text');t.setAttribute('x',40);t.setAttribute('y',60);t.setAttribute('fill','#0f172a');t.setAttribute('font-size','18');t.textContent='Click Run to start an episode.';c.appendChild(t);return; | |
| } | |
| for(var i=0;i<els.length;i++){ | |
| var el=els[i],b=el.bbox||el.box;if(!b||b.length<4)continue; | |
| var x=b[0]*800,y=b[1]*1000,w=b[2]*800,h=b[3]*1000; | |
| var r=document.createElementNS(ns,'rect'); | |
| r.setAttribute('x',x);r.setAttribute('y',y);r.setAttribute('width',w);r.setAttribute('height',h); | |
| r.setAttribute('rx',6);r.setAttribute('fill',cColors[el.role||el.type]||'#e5e7eb'); | |
| r.setAttribute('stroke','#0f172a');r.setAttribute('stroke-width',1.5);c.appendChild(r); | |
| var lb=document.createElementNS(ns,'text'); | |
| lb.setAttribute('x',x+6);lb.setAttribute('y',y+18);lb.setAttribute('fill','#0f172a');lb.setAttribute('font-size','13'); | |
| lb.setAttribute('font-family','JetBrains Mono,monospace');lb.textContent=el.id||'el';c.appendChild(lb); | |
| } | |
| } | |
| /* ββ Scores ββ */ | |
| function renderScores(s,st){ | |
| $('ms').textContent=fmt(s&&s.final_score!=null?s.final_score:(st?st.current_score:null)); | |
| $('mi').textContent=fmt(s&&s.instruction_score!=null?s.instruction_score:(st?st.instruction_score:null)); | |
| $('mst').textContent=''+(s?s.steps_taken||0:(st?st.step_count||0:0)); | |
| $('mr').textContent=fmt(s?s.total_reward||0:0,2); | |
| } | |
| /* ββ Trajectory ββ */ | |
| function polBadge(tag){ | |
| if(!tag) return '<span class="bg bg-h">?</span>'; | |
| if(tag.indexOf('finetuned_')===0) return '<span class="bg bg-ft">'+tag+'</span>'; | |
| if(tag==='heuristic') return '<span class="bg bg-h">heuristic</span>'; | |
| if(tag==='router_base') return '<span class="bg bg-rt">router_base</span>'; | |
| if(tag==='local_base') return '<span class="bg bg-b">local_base</span>'; | |
| if(tag.indexOf('fallback')>=0) return '<span class="bg bg-fb">'+tag+'</span>'; | |
| return '<span class="bg bg-h">'+tag+'</span>'; | |
| } | |
| function rwCls(r){return r>0.001?'rp':r<-0.001?'rn':'rz';} | |
| function renderTraj(traj){ | |
| var b=$('tb');b.innerHTML=''; | |
| for(var i=0;i<traj.length;i++){ | |
| var t=traj[i],tr=document.createElement('tr'); | |
| tr.innerHTML='<td>'+t.step+'</td><td>'+(t.action||'')+'</td><td class="'+rwCls(t.reward)+'">'+fmt(t.reward,3)+'</td><td>'+fmt(t.score,3)+'</td><td>'+polBadge(t.policy)+'</td>'; | |
| b.appendChild(tr); | |
| } | |
| $('tc').textContent=traj.length?'\xb7 '+traj.length+' step'+(traj.length===1?'':'s'):'β no steps yet'; | |
| } | |
| /* ββ Controls ββ */ | |
| function setSts(text,cls){var s=$('sts');s.innerHTML=text;s.className='sts '+(cls||'');} | |
| function selPol(){var r=document.querySelector('input[name="pol"]:checked');return r?r.value:'heuristic';} | |
| function selMode(){var r=document.querySelector('input[name="runmode"]:checked');return r?r.value:'cached';} | |
| /* update Run button label when mode changes */ | |
| var modeRadios = document.querySelectorAll('input[name="runmode"]'); | |
| for(var mi=0;mi<modeRadios.length;mi++){ | |
| modeRadios[mi].addEventListener('change', function(){ | |
| var m = selMode(); | |
| $('run').textContent = m==='cached' ? 'Show Cached Result' : 'Run Live Episode'; | |
| }); | |
| } | |
| function resetEnv(){ | |
| setSts('<span class="spinner"></span>Resettingβ¦','run'); | |
| return fetch('/demo/reset',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({task_id:$('task').value,seed:0})}).then(function(r){ | |
| if(!r.ok) throw new Error('reset '+r.status);return r.json(); | |
| }).then(function(d){ | |
| drawState(d.state);renderScores({steps_taken:0,total_reward:0},d.state);renderTraj([]);$('rj').textContent=''; | |
| setSts('Ready: '+$('task').value+'. Select mode and click Run.',''); | |
| })['catch'](function(e){setSts('Reset error: '+e.message,'err');}); | |
| } | |
| /* ββ Elapsed timer & wait banner ββ */ | |
| var _elapsedTimer = null; | |
| var _elapsedStart = 0; | |
| function showWaitBanner(){ | |
| _elapsedStart = Date.now(); | |
| $('wait-banner').className = 'wait-banner show'; | |
| $('wb-timer').textContent = '0s'; | |
| _elapsedTimer = setInterval(function(){ | |
| var sec = Math.round((Date.now() - _elapsedStart) / 1000); | |
| $('wb-timer').textContent = sec + 's'; | |
| }, 1000); | |
| } | |
| function hideWaitBanner(){ | |
| $('wait-banner').className = 'wait-banner'; | |
| if(_elapsedTimer){ clearInterval(_elapsedTimer); _elapsedTimer = null; } | |
| } | |
| /* nav buttons in wait banner and teaser */ | |
| if($('wb-bench')) $('wb-bench').addEventListener('click', function(){ switchToTab('bench'); }); | |
| if($('wb-about')) $('wb-about').addEventListener('click', function(){ switchToTab('about'); }); | |
| if($('go-bench')) $('go-bench').addEventListener('click', function(){ switchToTab('bench'); }); | |
| if($('go-blog')) $('go-blog').addEventListener('click', function(){ | |
| window.open('https://huggingface.co/spaces/yashvyasop/DesignGym/blob/main/Blog.md','_blank'); | |
| }); | |
| /* ββ Cached result lookup ββ */ | |
| function getCacheKey(){ | |
| var pol = selPol(); | |
| var task = $('task').value; | |
| if(pol === 'heuristic') return 'heuristic:' + task; | |
| var adapter = $('asw').value || 'sft'; | |
| return adapter + ':' + task; | |
| } | |
| function showCachedResult(){ | |
| var key = getCacheKey(); | |
| var entry = CACHED[key]; | |
| if(!entry){ | |
| setSts('No cached result for ' + key + '. Try Run Live instead.','err'); | |
| return; | |
| } | |
| var s = entry.summary; | |
| var traj = entry.trajectory; | |
| var fs = entry.final_state; | |
| renderScores(s, null); | |
| renderTraj(traj); | |
| if(fs) drawState(fs); | |
| $('rj').textContent = JSON.stringify(s, null, 2); | |
| var src = '<span class="src-badge src-cached">CACHED</span> Pre-computed on MPS (M1) \xb7 seed=0 \xb7 deterministic \xb7 '; | |
| src += s.steps_taken + ' steps \xb7 score ' + fmt(s.final_score); | |
| if(s.wall_time_sec > 0) src += ' \xb7 originally ran in ' + s.wall_time_sec + 's'; | |
| setSts(src, 'ok'); | |
| } | |
| /* ββ Live run ββ */ | |
| function runLiveEpisode(){ | |
| var pol=selPol(); | |
| var polName=pol==='heuristic'?'Heuristic':((backendInfo.adapter_key||'SFT').toUpperCase()+' LoRA'); | |
| var isLLM = pol !== 'heuristic'; | |
| var timeHint = isLLM ? '~1β1.5 min on CPU' : '~1s'; | |
| setSts('<span class="spinner"></span>Running '+polName+' live⦠('+timeHint+')','run'); | |
| $('run').disabled=true;$('rst').disabled=true; | |
| if(isLLM) showWaitBanner(); | |
| fetch('/demo/run_episode',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({policy:pol,task_id:$('task').value,seed:0})}).then(function(r){ | |
| if(!r.ok) return r.text().then(function(t){throw new Error('HTTP '+r.status+' '+t.substring(0,200));}); | |
| return r.json(); | |
| }).then(function(d){ | |
| if(d.error) throw new Error(d.error); | |
| hideWaitBanner(); | |
| switchToTab('demo'); | |
| drawState(d.final_state); | |
| renderScores(d.summary,d.final_state); | |
| renderTraj(d.trajectory||[]); | |
| $('rj').textContent=JSON.stringify(d.summary,null,2); | |
| var src = '<span class="src-badge src-live">LIVE</span> '; | |
| setSts(src+'Done in '+(d.summary.wall_time_sec||'?')+'s β '+(d.summary.steps_taken||0)+' steps \xb7 score '+fmt(d.summary.final_score),'ok'); | |
| })['catch'](function(e){hideWaitBanner();setSts('Error: '+e.message,'err');}) | |
| ['finally'](function(){$('run').disabled=false;$('rst').disabled=false;}); | |
| } | |
| /* ββ Main run handler ββ */ | |
| function handleRun(){ | |
| var mode = selMode(); | |
| if(mode === 'cached'){ | |
| showCachedResult(); | |
| } else { | |
| runLiveEpisode(); | |
| } | |
| } | |
| $('run').addEventListener('click', handleRun); | |
| $('rst').addEventListener('click',resetEnv); | |
| $('task').addEventListener('change',resetEnv); | |
| /* ββ Init ββ */ | |
| fetchBI(); | |
| resetEnv(); | |
| })(); | |
| </script> | |
| </body> | |
| </html> | |