DesignGym / web /index.html
yashvyasop's picture
Upload folder using huggingface_hub
1e65f1e verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>DesignGym 2.0 β€” Live Demo</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="preconnect" href="https://fonts.googleapis.com" />
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
<link href="https://fonts.googleapis.com/css2?family=Fraunces:opsz,wght@9..144,400;9..144,700&family=Inter:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet" />
<style>
:root{
--bg:#0f1117;--s1:#181b25;--s2:#1f2330;--s3:#262a38;
--bd:rgba(255,255,255,.08);--bd2:rgba(255,255,255,.14);
--tx:#e8ecf4;--mt:#8892a6;--ac:#5b9cf5;--gn:#3dd68c;
--am:#f0b429;--rd:#ef6461;
--ff:'Inter',system-ui,sans-serif;
--fm:'JetBrains Mono',ui-monospace,monospace;
--fd:'Fraunces',Georgia,serif;
}
*,*::before,*::after{box-sizing:border-box;margin:0}
body{font-family:var(--ff);background:var(--bg);color:var(--tx);line-height:1.5;min-height:100vh}
a{color:var(--ac);text-decoration:none}a:hover{text-decoration:underline}
/* ── NAV TABS ── */
.tab-bar{display:flex;gap:6px;padding:10px 20px;background:var(--s1);border-bottom:1px solid var(--bd);position:sticky;top:0;z-index:90;align-items:center}
.tab-bar-title{font-family:var(--fd);font-size:16px;font-weight:700;margin-right:auto;white-space:nowrap}
.tab-btn{display:inline-flex;align-items:center;gap:6px;padding:8px 18px;font-family:var(--ff);font-size:13px;font-weight:600;color:var(--mt);background:var(--s2);border:1px solid var(--bd2);border-radius:999px;cursor:pointer;transition:all .15s;white-space:nowrap}
.tab-btn:hover{color:var(--tx);background:var(--s3);border-color:var(--ac)}
.tab-btn.active{color:#fff;background:var(--ac);border-color:var(--ac)}
.tab-ico{font-size:15px;line-height:1}
.blog-link{display:inline-flex;align-items:center;gap:6px;padding:8px 18px;font-family:var(--ff);font-size:13px;font-weight:600;color:var(--mt);background:var(--s2);border:1px solid var(--bd2);border-radius:999px;cursor:pointer;transition:all .15s;white-space:nowrap;text-decoration:none}
.blog-link:hover{color:#fff;background:#d4629a;border-color:#d4629a;text-decoration:none}
.tab-page{display:none;padding:0}.tab-page.active{display:block}
/* ── HEADER ── */
.hdr{padding:20px 24px;border-bottom:1px solid var(--bd)}
.hdr h1{font-family:var(--fd);font-size:22px;font-weight:700}
.hdr-sub{color:var(--mt);font-size:12px;margin-top:4px}
/* ── BACKEND CARD ── */
.bc{margin:16px 24px;background:var(--s1);border:1px solid var(--bd2);border-radius:12px;padding:14px 18px;display:flex;align-items:center;gap:14px;flex-wrap:wrap}
.dot{width:10px;height:10px;border-radius:50%;flex-shrink:0}
.dot-g{background:var(--gn);box-shadow:0 0 6px var(--gn)}.dot-a{background:var(--am);box-shadow:0 0 6px var(--am)}.dot-r{background:var(--rd);box-shadow:0 0 6px var(--rd)}.dot-m{background:var(--mt)}
.bc-info{flex:1;min-width:200px}
.bc-lbl{font-family:var(--fm);font-size:13px;font-weight:500}
.bc-det{color:var(--mt);font-size:11px;margin-top:2px}
.sw{display:flex;align-items:center;gap:6px}
.sw label{font-size:10px;color:var(--mt);text-transform:uppercase;letter-spacing:.5px}
.sw select{font-family:var(--fm);font-size:12px;padding:5px 8px;border-radius:6px;border:1px solid var(--bd2);background:var(--s2);color:var(--tx)}
/* ── PANELS ── */
.grid2{display:grid;grid-template-columns:320px 1fr;gap:16px;padding:16px 24px}
@media(max-width:860px){.grid2{grid-template-columns:1fr}}
.pnl{background:var(--s1);border:1px solid var(--bd);border-radius:12px;padding:16px}
.pnl h2{font-size:10px;text-transform:uppercase;letter-spacing:.6px;color:var(--mt);margin:14px 0 6px}.pnl h2:first-child{margin-top:0}
/* controls */
.sel{width:100%;font-family:var(--fm);font-size:12px;padding:8px 10px;border-radius:8px;border:1px solid var(--bd2);background:var(--s2);color:var(--tx);-webkit-appearance:none;appearance:none}
.pol-pick{display:flex;flex-direction:column;gap:6px}
.pol-c{display:flex;gap:8px;align-items:flex-start;padding:8px 10px;border:1px solid var(--bd2);border-radius:8px;background:var(--s2);cursor:pointer;transition:border-color .12s}
.pol-c:hover{border-color:var(--ac)}.pol-c input{margin-top:3px}
.pol-c div{display:flex;flex-direction:column}.pol-c strong{font-size:12px}.pol-c .desc{color:var(--mt);font-size:10px;line-height:1.4}
.help-tip{display:block;color:var(--mt);font-size:10px;margin-top:3px;font-style:italic}
.acts{display:flex;gap:6px;margin-top:4px}
button{font-family:var(--ff);font-size:12px;padding:9px 12px;border-radius:8px;border:1px solid var(--bd2);background:var(--s2);color:var(--tx);cursor:pointer;transition:border-color .12s,background .12s}
button:hover{border-color:var(--ac)}button:disabled{opacity:.35;cursor:not-allowed}
button.pri{background:var(--ac);border-color:transparent;color:#0f1117;font-weight:600;flex:1;min-width:120px}
button.pri:hover{background:#7ab4f7}
button.sec{background:transparent}
.sts{margin-top:10px;font-size:11px;font-family:var(--fm);padding:7px 9px;border-radius:6px;background:rgba(255,255,255,.03);border:1px dashed var(--bd2);min-height:32px}
.sts.run{border-style:solid;border-color:var(--ac);color:var(--ac)}
.sts.ok{border-style:solid;border-color:var(--gn);color:var(--gn)}
.sts.err{border-style:solid;border-color:var(--rd);color:var(--rd)}
/* while-you-wait banner */
.wait-banner{display:none;margin:12px 24px;padding:14px 18px;background:linear-gradient(135deg,rgba(91,156,245,.08),rgba(212,98,154,.08));border:1px solid rgba(91,156,245,.25);border-radius:12px;animation:fadeIn .3s ease}
.wait-banner.show{display:block}
.wait-banner .wb-hd{font-size:13px;font-weight:600;margin-bottom:6px;color:var(--tx)}
.wait-banner .wb-timer{font-family:var(--fm);font-size:22px;font-weight:600;color:var(--ac);margin:6px 0}
.wait-banner .wb-links{display:flex;gap:8px;margin-top:8px;flex-wrap:wrap}
.wait-banner .wb-btn{display:inline-flex;align-items:center;gap:5px;padding:7px 14px;font-size:12px;font-weight:600;border-radius:8px;border:1px solid var(--bd2);background:var(--s2);color:var(--tx);cursor:pointer;transition:all .15s;text-decoration:none}
.wait-banner .wb-btn:hover{border-color:var(--ac);background:var(--s3);text-decoration:none}
.wait-banner .wb-sub{color:var(--mt);font-size:11px;margin-top:4px}
@keyframes fadeIn{from{opacity:0;transform:translateY(-6px)}to{opacity:1;transform:translateY(0)}}
/* quick-results teaser */
.teaser{margin:0 24px;padding:10px 16px;background:var(--s1);border:1px solid var(--bd);border-radius:8px;font-size:12px;color:var(--mt);display:flex;align-items:center;gap:8px;flex-wrap:wrap}
.teaser strong{color:var(--tx)}
.teaser .t-link{color:var(--ac);cursor:pointer;font-weight:600;text-decoration:underline}
/* mode toggle */
.mode-toggle{display:flex;flex-direction:column;gap:6px}
.mode-opt{display:flex;gap:8px;align-items:flex-start;padding:8px 10px;border:1px solid var(--bd2);border-radius:8px;background:var(--s2);cursor:pointer;transition:border-color .12s}
.mode-opt:hover{border-color:var(--ac)}.mode-opt input{margin-top:3px}
.mode-opt div{display:flex;flex-direction:column}.mode-opt strong{font-size:12px}.mode-opt .desc{color:var(--mt);font-size:10px;line-height:1.4}
/* source badge shown on cached results */
.src-badge{display:inline-block;font-size:10px;padding:3px 10px;border-radius:5px;font-weight:500;margin:6px 0}
.src-cached{background:rgba(91,156,245,.12);color:var(--ac)}
.src-live{background:rgba(61,214,140,.12);color:var(--gn)}
/* loading spinner */
@keyframes spin{to{transform:rotate(360deg)}}
.spinner{display:inline-block;width:14px;height:14px;border:2px solid var(--bd2);border-top-color:var(--ac);border-radius:50%;animation:spin .7s linear infinite;vertical-align:middle;margin-right:6px}
/* metrics */
.mrow{display:grid;grid-template-columns:repeat(4,1fr);gap:8px;margin-top:10px}
@media(max-width:600px){.mrow{grid-template-columns:repeat(2,1fr)}}
.mc{background:var(--s2);border:1px solid var(--bd);border-radius:8px;padding:10px;display:flex;flex-direction:column;gap:1px}
.mc .ml{font-size:9px;text-transform:uppercase;letter-spacing:.5px;color:var(--mt)}
.mc .mv{font-family:var(--fm);font-size:20px;font-weight:500;font-feature-settings:"tnum"}
.mc.hi{border-color:rgba(91,156,245,.3)}.mc.hi .mv{color:var(--ac)}
/* canvas */
#canvas{width:100%;max-height:52vh;background:#f8fafc;border-radius:8px;margin-top:10px}
/* trajectory */
.trj{margin:0 24px 20px;background:var(--s1);border:1px solid var(--bd);border-radius:12px;padding:16px}
.trj summary{cursor:pointer;font-size:13px;font-weight:600;padding:2px 0}.trj summary:hover{color:var(--ac)}
.trj-ct{color:var(--mt);font-weight:400;font-size:11px}
table.t{width:100%;border-collapse:collapse;font-size:11px;font-family:var(--fm);margin-top:8px}
table.t th,table.t td{padding:6px 8px;border-bottom:1px solid var(--bd);text-align:left}
table.t th{font-size:9px;text-transform:uppercase;letter-spacing:.5px;color:var(--mt);font-weight:500}
.rp{color:var(--gn)}.rn{color:var(--rd)}.rz{color:var(--mt)}
.bg{display:inline-block;font-size:9px;padding:2px 6px;border-radius:5px;font-weight:500;letter-spacing:.3px}
.bg-ft{background:rgba(61,214,140,.15);color:var(--gn)}.bg-h{background:rgba(136,146,166,.15);color:var(--mt)}
.bg-rt{background:rgba(240,180,41,.15);color:var(--am)}.bg-b{background:rgba(240,180,41,.15);color:var(--am)}
.bg-fb{background:rgba(239,100,97,.15);color:var(--rd)}
pre.raw{white-space:pre-wrap;word-break:break-word;background:var(--s2);border-radius:6px;padding:8px;font-size:10px;color:#9ca8be;max-height:260px;overflow:auto;margin-top:6px}
/* ── BENCHMARK TAB ── */
.bm{padding:20px 24px;max-width:900px}
.bm h2{font-family:var(--fd);font-size:18px;margin:24px 0 8px;color:var(--tx)}.bm h2:first-child{margin-top:0}
.bm p{color:var(--mt);font-size:13px;line-height:1.6;margin:6px 0}
.bm table{width:100%;border-collapse:collapse;font-size:12px;font-family:var(--fm);margin:10px 0 16px}
.bm th,.bm td{padding:8px 10px;border-bottom:1px solid var(--bd);text-align:left}
.bm th{font-size:10px;text-transform:uppercase;color:var(--mt);font-weight:500}
.bm .win{color:var(--gn);font-weight:600}
.bm .tag{display:inline-block;font-size:10px;padding:2px 8px;border-radius:4px;font-weight:500}
.bm .tag-g{background:rgba(61,214,140,.12);color:var(--gn)}
.bm .tag-a{background:rgba(240,180,41,.12);color:var(--am)}
.bm .tag-r{background:rgba(239,100,97,.12);color:var(--rd)}
.bm blockquote{border-left:3px solid var(--ac);padding:8px 14px;margin:10px 0;background:rgba(91,156,245,.06);border-radius:0 6px 6px 0;font-size:12px;color:var(--mt)}
.bm ul{color:var(--mt);font-size:13px;padding-left:20px;margin:6px 0}
.bm li{margin:4px 0}
.bm code{font-family:var(--fm);font-size:11px;background:var(--s2);padding:1px 5px;border-radius:3px}
.bm a{color:var(--ac)}
/* ── ABOUT TAB ── */
.abt{padding:20px 24px;max-width:900px}
.abt h2{font-family:var(--fd);font-size:18px;margin:24px 0 8px}.abt h2:first-child{margin-top:0}
.abt h3{font-size:14px;margin:16px 0 6px;color:var(--tx)}
.abt p{color:var(--mt);font-size:13px;line-height:1.6;margin:6px 0}
.abt a{color:var(--ac)}
.abt ul{color:var(--mt);font-size:13px;padding-left:20px}
.abt li{margin:4px 0}
.abt code{font-family:var(--fm);font-size:11px;background:var(--s2);padding:1px 5px;border-radius:3px}
.link-grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(260px,1fr));gap:10px;margin:12px 0}
.link-card{background:var(--s1);border:1px solid var(--bd2);border-radius:10px;padding:12px 14px;transition:border-color .12s}
.link-card:hover{border-color:var(--ac);text-decoration:none}
.link-card .lc-t{font-size:13px;font-weight:600;color:var(--tx)}
.link-card .lc-d{font-size:11px;color:var(--mt);margin-top:3px}
.abt img{max-width:100%;border-radius:8px;margin:10px 0}
/* ── FOOTER ── */
footer{text-align:center;padding:16px;font-size:10px;color:var(--mt);border-top:1px solid var(--bd)}
footer a{color:var(--mt);margin:0 4px}footer a:hover{color:var(--ac)}
.fenv{margin-top:4px;font-family:var(--fm);font-size:9px;color:rgba(255,255,255,.2)}
</style>
</head>
<body>
<!-- TAB BAR -->
<nav class="tab-bar">
<span class="tab-bar-title">DesignGym 2.0</span>
<button class="tab-btn active" data-tab="demo"><span class="tab-ico">&#9654;</span> Demo</button>
<button class="tab-btn" data-tab="bench"><span class="tab-ico">&#9776;</span> Benchmark</button>
<button class="tab-btn" data-tab="about"><span class="tab-ico">&#9881;</span> About &amp; Links</button>
<a class="blog-link" href="https://huggingface.co/spaces/yashvyasop/DesignGym/blob/main/Blog.md" target="_blank"><span class="tab-ico">&#128221;</span> Blog</a>
</nav>
<!-- ==================== DEMO TAB ==================== -->
<div class="tab-page active" id="page-demo">
<header class="hdr">
<p class="hdr-sub">Watch the agent design β€” compare heuristic vs fine-tuned LoRA models in real time</p>
</header>
<div class="teaser" id="teaser">
<span>&#9889;</span>
<span><strong>Tip:</strong> Episodes take ~30-60s on CPU. Pre-computed results are ready in the <span class="t-link" id="go-bench">Benchmark</span> tab, or read the <span class="t-link" id="go-blog">Blog</span> while you wait.</span>
</div>
<div class="wait-banner" id="wait-banner">
<div class="wb-hd">&#9203; Episode running on CPU...</div>
<div class="wb-timer" id="wb-timer">0s</div>
<div class="wb-sub">The model is running inference on CPU β€” each step takes a few seconds. Totally normal.</div>
<div class="wb-links">
<span class="wb-btn" id="wb-bench">&#9776; View Benchmark Results</span>
<a class="wb-btn" href="https://huggingface.co/spaces/yashvyasop/DesignGym/blob/main/Blog.md" target="_blank">&#128221; Read the Blog</a>
<span class="wb-btn" id="wb-about">&#9881; Project Links &amp; Notebooks</span>
</div>
<div class="wb-sub" style="margin-top:8px">Results will appear here when done β€” you can navigate away and come back.</div>
</div>
<div class="bc" id="bc">
<div class="dot dot-m" id="bd"></div>
<div class="bc-info">
<div class="bc-lbl" id="bl">Checking backend...</div>
<div class="bc-det" id="bdet"></div>
</div>
<div class="sw">
<label>Adapter</label>
<select id="asw">
<option value="sft">SFT</option>
<option value="grpo">GRPO</option>
<option value="base">Base (no LoRA)</option>
</select>
</div>
</div>
<div class="grid2">
<div class="pnl" id="ctrl">
<h2>Task</h2>
<select id="task" class="sel">
<option value="poster_basic_v1">Poster β€” easy</option>
<option value="editorial_cover_v1">Editorial Cover β€” medium</option>
<option value="dense_flyer_v1">Dense Flyer β€” hard</option>
</select>
<span class="help-tip">Each task tests different layout skills: hierarchy, spacing, reading order</span>
<h2>Policy</h2>
<div class="pol-pick">
<label class="pol-c">
<input type="radio" name="pol" value="heuristic" checked />
<div>
<strong>Heuristic Planner</strong>
<span class="desc">Hand-coded rules. Instant. The baseline to beat.</span>
</div>
</label>
<label class="pol-c" id="pol-llm">
<input type="radio" name="pol" value="llm" />
<div>
<strong id="llm-lbl">LLM Picker</strong>
<span class="desc" id="llm-desc">Uses the active adapter model to choose actions</span>
</div>
</label>
</div>
<span class="help-tip">Heuristic is the teacher that generated SFT training data. LLM is the student.</span>
<h2>Run Mode</h2>
<div class="mode-toggle">
<label class="mode-opt">
<input type="radio" name="runmode" value="cached" checked />
<div>
<strong>Cached Result</strong>
<span class="desc">Instant β€” shows pre-computed benchmark output (seed=0)</span>
</div>
</label>
<label class="mode-opt">
<input type="radio" name="runmode" value="live" />
<div>
<strong>Run Live</strong>
<span class="desc">Execute on server CPU (~1-1.5 min per LLM episode)</span>
</div>
</label>
</div>
<h2>Run</h2>
<div class="acts">
<button id="run" class="pri">Show Cached Result</button>
<button id="rst" class="sec">Reset</button>
</div>
<span class="help-tip">Cached = instant pre-computed results. Live = real model inference on CPU.</span>
<div id="sts" class="sts">Idle. Pick a task and click Run.</div>
</div>
<div class="pnl">
<div class="mrow">
<div class="mc hi"><span class="ml">Final Score</span><span class="mv" id="ms">β€”</span></div>
<div class="mc"><span class="ml">Instruction</span><span class="mv" id="mi">β€”</span></div>
<div class="mc"><span class="ml">Steps</span><span class="mv" id="mst">β€”</span></div>
<div class="mc"><span class="ml">Total Reward</span><span class="mv" id="mr">β€”</span></div>
</div>
<svg id="canvas" viewBox="0 0 800 1000" preserveAspectRatio="xMidYMid meet"></svg>
</div>
</div>
<div class="trj">
<details open>
<summary>Trajectory <span class="trj-ct" id="tc">β€” no steps yet</span></summary>
<table class="t">
<thead><tr><th>Step</th><th>Action</th><th>Reward</th><th>Score</th><th>Policy</th></tr></thead>
<tbody id="tb"></tbody>
</table>
</details>
<details>
<summary>Raw JSON</summary>
<pre class="raw" id="rj"></pre>
</details>
</div>
</div>
<!-- ==================== BENCHMARK TAB ==================== -->
<div class="tab-page" id="page-bench">
<div class="bm">
<h2>Benchmark Results</h2>
<p>36 episodes total: 4 backends &times; 3 tasks &times; 3 seeds. Deterministic environment, MPS (M1) inference. Every number is reproducible.</p>
<h2>Overall Performance</h2>
<table>
<thead><tr><th>Backend</th><th>Instruction Score</th><th>Total Reward</th><th>Avg Time</th><th>LLM Steer Rate</th></tr></thead>
<tbody>
<tr><td>Heuristic</td><td>0.5564</td><td>1.588</td><td class="win">0.0s</td><td>β€”</td></tr>
<tr><td>Base Qwen (no LoRA)</td><td>0.5367</td><td>1.679</td><td>11.5s</td><td>100%</td></tr>
<tr><td>SFT Fine-tuned</td><td>0.5557</td><td>1.789</td><td>16.8s</td><td>100%</td></tr>
<tr><td>GRPO Fine-tuned</td><td>0.5599</td><td class="win">1.854</td><td>12.0s</td><td>100%</td></tr>
</tbody>
</table>
<h2>Per-Task Breakdown</h2>
<table>
<thead><tr><th>Task</th><th>Backend</th><th>Instr Score</th><th>Total Reward</th></tr></thead>
<tbody>
<tr><td rowspan="4">Poster (easy)</td><td>Heuristic</td><td>0.5033</td><td>1.319</td></tr>
<tr><td>Base</td><td>0.5087</td><td>1.400</td></tr>
<tr><td>SFT</td><td>0.5238</td><td>1.435</td></tr>
<tr><td>GRPO</td><td>0.5129</td><td>1.455</td></tr>
<tr><td rowspan="4">Editorial (med)</td><td>Heuristic</td><td>0.5424</td><td>1.544</td></tr>
<tr><td>Base</td><td>0.4866</td><td>1.658</td></tr>
<tr><td>SFT</td><td>0.4878</td><td>1.894</td></tr>
<tr><td>GRPO</td><td>0.4795</td><td>1.966</td></tr>
<tr><td rowspan="4">Dense Flyer (hard)</td><td>Heuristic</td><td>0.6235</td><td>1.900</td></tr>
<tr><td>Base</td><td>0.6148</td><td>1.980</td></tr>
<tr><td>SFT</td><td class="win">0.6555</td><td>2.038</td></tr>
<tr><td>GRPO</td><td class="win">0.6872</td><td class="win">2.139</td></tr>
</tbody>
</table>
<h2>Honest Assessment</h2>
<blockquote>These results are real. No cherry-picking, no hidden runs. The environment is deterministic β€” re-run with the same seeds and you get the same numbers.</blockquote>
<p><span class="tag tag-g">What works</span></p>
<ul>
<li><strong>SFT eliminated 0% &rarr; 100% valid JSON</strong> β€” the biggest win. Base Qwen cannot speak the action format at all. After SFT it can.</li>
<li><strong>GRPO gets the highest total reward</strong> (1.854 avg) β€” it picks bolder, higher-payoff actions per step.</li>
<li><strong>On the hardest task (dense_flyer)</strong>, both fine-tuned models beat base on instruction score: SFT 0.655 vs base 0.615, GRPO 0.687 vs base 0.615.</li>
<li><strong>100% LLM steer rate</strong> β€” every step is model-driven, zero fallback to heuristic.</li>
</ul>
<p><span class="tag tag-a">What's honest</span></p>
<ul>
<li><strong>Heuristic still wins on final score</strong> (0.738 vs SFT 0.702). The hand-coded rules are a strong baseline because they were written with full knowledge of the reward function.</li>
<li><strong>SFT and GRPO are ~equal to base on some tasks</strong> β€” the adapter lift is small (~0.5-2% on instruction score). More GRPO training budget would likely help.</li>
<li>The 0.5B model is at the edge of what can reason about complex layout state β€” a larger base model (3B+) would likely show bigger adapter-vs-base differences.</li>
</ul>
<p><span class="tag tag-r">What to improve</span></p>
<ul>
<li><strong>More GRPO training</strong> β€” current run was limited to ~200 steps on free Colab GPU. State-of-the-art needs 1000+ steps with best-of-N sampling.</li>
<li><strong>Reward shaping</strong> β€” GRPO's higher reward but lower final score suggests the reward function could better align per-step gains with end-of-episode quality.</li>
<li><strong>Larger base model</strong> β€” Qwen 3B or 7B with LoRA would still fit in 16GB with quantization and would better handle the multi-metric reasoning.</li>
<li><strong>Process reward model</strong> β€” train a critic that scores partial trajectories, giving GRPO denser signal than episode-end score alone.</li>
</ul>
</div>
</div>
<!-- ==================== ABOUT TAB ==================== -->
<div class="tab-page" id="page-about">
<div class="abt">
<h2>What is DesignGym?</h2>
<p>DesignGym 2.0 is an OpenEnv-compatible RL environment where an LLM agent learns to improve graphic layouts through sequential actions β€” move, resize, align, reflow, promote, finalize β€” evaluated by computable aesthetic metrics (overlap, alignment, spacing, hierarchy, reading order, instruction fit).</p>
<p>The training pipeline: <strong>Heuristic Planner</strong> generates expert trajectories &rarr; <strong>SFT</strong> teaches the model the action interface (0% &rarr; 100% valid JSON) &rarr; <strong>GRPO</strong> learns which valid actions are better via environment reward.</p>
<h2>Project Links</h2>
<div class="link-grid">
<a class="link-card" href="https://github.com/canboyedits/DesignGym" target="_blank">
<div class="lc-t">GitHub Repo</div>
<div class="lc-d">Full source: environment, training, inference, server</div>
</a>
<a class="link-card" href="https://huggingface.co/spaces/yashvyasop/DesignGym" target="_blank">
<div class="lc-t">HF Space (Live)</div>
<div class="lc-d">This deployed demo on Hugging Face</div>
</a>
<a class="link-card" href="https://huggingface.co/yashvyasop/designgym2-sft-qwen05-lora" target="_blank">
<div class="lc-t">SFT LoRA Adapter</div>
<div class="lc-d">Qwen2.5-0.5B + SFT fine-tune on heuristic data</div>
</a>
<a class="link-card" href="https://huggingface.co/yashvyasop/designgym2-grpo-qwen05-lora" target="_blank">
<div class="lc-t">GRPO LoRA Adapter</div>
<div class="lc-d">Qwen2.5-0.5B + GRPO RL from environment reward</div>
</a>
<a class="link-card" href="https://colab.research.google.com/drive/1ZtjQSen19Sdmx8FOXvM-nb_AFDSNM_1C?usp=sharing" target="_blank">
<div class="lc-t">SFT Training Notebook</div>
<div class="lc-d">Colab: data generation, training loop, eval</div>
</a>
<a class="link-card" href="https://colab.research.google.com/drive/1jw1waO-bc0Mk3U7-RBbomsIGFBWvA0aW?usp=sharing" target="_blank">
<div class="lc-t">GRPO Training Notebook</div>
<div class="lc-d">Colab: GRPO with environment-in-the-loop reward</div>
</a>
<a class="link-card" href="https://colab.research.google.com/drive/1U1t9GVkc8sk2BeYCxoDnlHV1WMjYCpv1?usp=sharing" target="_blank">
<div class="lc-t">Evaluation Notebook</div>
<div class="lc-d">Colab: base vs SFT vs GRPO head-to-head eval</div>
</a>
<a class="link-card" href="https://huggingface.co/jobs/yashvyasop/69ed7b02d70108f37acdf597" target="_blank">
<div class="lc-t">HF Training Logs</div>
<div class="lc-d">Hugging Face training job telemetry</div>
</a>
</div>
<h2>Training Pipeline</h2>
<img src="/assets/Architectural_Diagram.png" alt="DesignGym architecture diagram" />
<p>End-to-end: OpenEnv environment &rarr; heuristic planner bootstraps SFT data &rarr; SFT adapter locks in the action interface &rarr; GRPO learns design preference from verifiable reward.</p>
<h3>SFT: Teaching the Interface</h3>
<p>Base Qwen 0.5B understands design language but cannot produce executable JSON actions. SFT on heuristic planner trajectories achieves <strong>0% &rarr; 100% valid JSON</strong> β€” a capability phase transition, not just a fine-tune.</p>
<img src="/assets/SFT_plot_collage.png" alt="SFT training metrics" />
<h3>GRPO: Learning Preference</h3>
<p>Once the model can act, GRPO teaches it <em>which</em> valid action is better. It samples multiple candidates, executes them in the environment, and increases probability of higher-reward actions. No reward model needed β€” the environment is the oracle.</p>
<h3>Results from Training (Blog Table)</h3>
<table style="width:100%;border-collapse:collapse;font-size:12px;font-family:var(--fm);margin:10px 0">
<thead><tr style="border-bottom:1px solid var(--bd)"><th style="padding:8px;text-align:left;color:var(--mt);font-size:10px">Policy</th><th style="padding:8px;text-align:left;color:var(--mt);font-size:10px">Final Score</th><th style="padding:8px;text-align:left;color:var(--mt);font-size:10px">Instr Score</th><th style="padding:8px;text-align:left;color:var(--mt);font-size:10px">Valid JSON</th><th style="padding:8px;text-align:left;color:var(--mt);font-size:10px">Early Finalize</th></tr></thead>
<tbody>
<tr style="border-bottom:1px solid var(--bd)"><td style="padding:8px">Base Qwen 0.5B</td><td style="padding:8px">0.6948</td><td style="padding:8px">0.5360</td><td style="padding:8px;color:var(--rd)">0%</td><td style="padding:8px;color:var(--rd)">100%</td></tr>
<tr style="border-bottom:1px solid var(--bd)"><td style="padding:8px">SFT Qwen 0.5B</td><td style="padding:8px;color:var(--gn)">0.7101</td><td style="padding:8px;color:var(--gn)">0.6263</td><td style="padding:8px;color:var(--gn)">100%</td><td style="padding:8px;color:var(--gn)">0%</td></tr>
<tr style="border-bottom:1px solid var(--bd)"><td style="padding:8px">GRPO Qwen 0.5B</td><td style="padding:8px">0.6717</td><td style="padding:8px">0.5483</td><td style="padding:8px">98%</td><td style="padding:8px">67%</td></tr>
<tr style="border-bottom:1px solid var(--bd)"><td style="padding:8px">GRPO Best-of-4</td><td style="padding:8px">0.6781</td><td style="padding:8px;color:var(--gn)">0.5817</td><td style="padding:8px;color:var(--gn)">100%</td><td style="padding:8px">17%</td></tr>
</tbody>
</table>
<h2>How to Make It Better</h2>
<ul>
<li><strong>More GRPO budget:</strong> Current training was ~200 steps on free Colab T4. Papers show 1000-5000 steps with best-of-N=8 for significant RL lift.</li>
<li><strong>Larger base model:</strong> Qwen 3B or 7B with 4-bit LoRA would better handle multi-metric reasoning while still fitting in 16GB.</li>
<li><strong>Process reward model:</strong> Train a critic on partial trajectories to give GRPO denser signal than end-of-episode score.</li>
<li><strong>Curriculum learning:</strong> Start GRPO on easy tasks (poster), then progress to hard (dense_flyer) β€” the agent currently trains on all tasks equally.</li>
<li><strong>Reward alignment:</strong> GRPO's high total reward but lower final score suggests per-step reward doesn't fully correlate with episode quality. Tune the shaping function.</li>
</ul>
<h2>Environment</h2>
<p>3 tasks (poster, editorial, dense flyer) testing different layout skills. Deterministic scoring via 7 computable aesthetic metrics. Fully OpenEnv-compatible: <code>reset()</code>, <code>step(action)</code>, typed Pydantic models, FastAPI server, Docker deployment.</p>
</div>
</div>
<footer>
<a href="https://github.com/canboyedits/DesignGym" target="_blank">GitHub</a> &middot;
<a href="https://huggingface.co/spaces/yashvyasop/DesignGym" target="_blank">HF Space</a> &middot;
<a href="/info" target="_blank">API /info</a> &middot;
<a href="/demo/backend_info" target="_blank">Backend Info</a>
<div class="fenv" id="fenv"></div>
</footer>
<script>
(function(){
/* ── Safari compat: ES5 only, no ?. or ?? or => ── */
var $ = function(id){ return document.getElementById(id); };
var fmt = function(n,d){ d=d||3; return (n==null||isNaN(+n))?'β€”':(+n).toFixed(d); };
/* ── Cached benchmark results (seed=0, pre-computed on MPS M1) ── */
var CACHED = {"heuristic:poster_basic_v1":{"summary":{"final_score":0.7697,"instruction_score":0.4988,"steps_taken":7,"total_reward":1.318,"wall_time_sec":0.0},"trajectory":[{"step":1,"action":"resize","reward":0.1256,"score":0.7257,"policy":"heuristic"},{"step":2,"action":"resize","reward":0.1792,"score":0.7361,"policy":"heuristic"},{"step":3,"action":"resize","reward":0.2552,"score":0.7491,"policy":"heuristic"},{"step":4,"action":"resize","reward":0.255,"score":0.7616,"policy":"heuristic"},{"step":5,"action":"resize","reward":0.2542,"score":0.772,"policy":"heuristic"},{"step":6,"action":"reflow_group","reward":0.1225,"score":0.7543,"policy":"heuristic"},{"step":7,"action":"reflow_group","reward":0.1263,"score":0.7697,"policy":"heuristic"}],"final_state":{"elements":[{"id":"title","bbox":[0.1138,0.0903,0.5,0.12],"role":"title"},{"id":"subtitle","bbox":[0.1138,0.2304,0.44,0.08],"role":"subtitle"},{"id":"hero_image","bbox":[0.1455,0.2862,0.734,0.4588],"role":"image"},{"id":"cta","bbox":[0.0764,0.7521,0.3,0.1],"role":"cta"},{"id":"logo","bbox":[0.7191,0.1033,0.14,0.14],"role":"logo"},{"id":"badge","bbox":[0.6963,0.6902,0.2,0.14],"role":"badge"}]}},"heuristic:editorial_cover_v1":{"summary":{"final_score":0.7659,"instruction_score":0.5471,"steps_taken":9,"total_reward":1.558,"wall_time_sec":0.0},"trajectory":[{"step":1,"action":"resize","reward":0.1287,"score":0.8052,"policy":"heuristic"},{"step":2,"action":"resize","reward":0.175,"score":0.7825,"policy":"heuristic"},{"step":3,"action":"resize","reward":0.1752,"score":0.7832,"policy":"heuristic"},{"step":4,"action":"resize","reward":0.2507,"score":0.7859,"policy":"heuristic"},{"step":5,"action":"reflow_group","reward":0.2634,"score":0.7659,"policy":"heuristic"},{"step":6,"action":"reflow_group","reward":0.2539,"score":0.7815,"policy":"heuristic"},{"step":7,"action":"reflow_group","reward":0.0925,"score":0.7659,"policy":"heuristic"},{"step":8,"action":"reflow_group","reward":0.1264,"score":0.7815,"policy":"heuristic"},{"step":9,"action":"reflow_group","reward":0.0925,"score":0.7659,"policy":"heuristic"}],"final_state":{"elements":[{"id":"masthead","bbox":[0.1124,0.0693,0.56,0.1],"role":"title"},{"id":"hero_image","bbox":[0.0771,0.1713,0.8,0.46],"role":"image"},{"id":"headline_1","bbox":[0.1204,0.6266,0.54,0.1],"role":"title"},{"id":"headline_2","bbox":[0.1204,0.7548,0.46,0.08],"role":"subtitle"},{"id":"headline_3","bbox":[0.1204,0.863,0.4,0.06],"role":"subtitle"},{"id":"teaser","bbox":[0.7347,0.6902,0.16,0.12],"role":"badge"},{"id":"barcode","bbox":[0.7721,0.88,0.12,0.08],"role":"caption"},{"id":"logo","bbox":[0.1043,0.871,0.12,0.08],"role":"logo"}]}},"heuristic:dense_flyer_v1":{"summary":{"final_score":0.6547,"instruction_score":0.6301,"steps_taken":10,"total_reward":1.901,"wall_time_sec":0.0},"trajectory":[{"step":1,"action":"resize","reward":0.1253,"score":0.5824,"policy":"heuristic"},{"step":2,"action":"resize","reward":0.1906,"score":0.6214,"policy":"heuristic"},{"step":3,"action":"resize","reward":0.1759,"score":0.6238,"policy":"heuristic"},{"step":4,"action":"resize","reward":0.1768,"score":0.6263,"policy":"heuristic"},{"step":5,"action":"resize","reward":0.251,"score":0.6272,"policy":"heuristic"},{"step":6,"action":"resize","reward":0.2506,"score":0.5801,"policy":"heuristic"},{"step":7,"action":"resize","reward":0.264,"score":0.6315,"policy":"heuristic"},{"step":8,"action":"resize","reward":0.1714,"score":0.6405,"policy":"heuristic"},{"step":9,"action":"resize","reward":0.1708,"score":0.6485,"policy":"heuristic"},{"step":10,"action":"resize","reward":0.125,"score":0.6547,"policy":"heuristic"}],"final_state":{"elements":[{"id":"title","bbox":[0.091,0.0883,0.56,0.1],"role":"title"},{"id":"image_left","bbox":[0.0975,0.2323,0.264,0.22],"role":"image"},{"id":"image_right","bbox":[0.4204,0.257,0.264,0.22],"role":"image"},{"id":"price_badge","bbox":[0.7491,0.2337,0.16,0.12],"role":"badge"},{"id":"cta","bbox":[0.6593,0.4027,0.2,0.1],"role":"cta"},{"id":"details","bbox":[0.0731,0.4201,0.72,0.24],"role":"body"},{"id":"caption_1","bbox":[0.093,0.7082,0.22,0.1],"role":"caption"},{"id":"caption_2","bbox":[0.3838,0.722,0.22,0.1],"role":"caption"},{"id":"sponsor_strip","bbox":[0.1131,0.89,0.78,0.07],"role":"caption"}]}},"base:poster_basic_v1":{"summary":{"final_score":0.7443,"instruction_score":0.5041,"steps_taken":7,"total_reward":1.425,"wall_time_sec":9.9},"trajectory":[{"step":1,"action":"resize","reward":0.1256,"score":0.7257,"policy":"local_base"},{"step":2,"action":"resize","reward":0.1792,"score":0.7361,"policy":"local_base"},{"step":3,"action":"reflow_group","reward":0.25,"score":0.7206,"policy":"local_base"},{"step":4,"action":"align","reward":0.2537,"score":0.7353,"policy":"local_base"},{"step":5,"action":"promote","reward":0.2511,"score":0.7385,"policy":"local_base"},{"step":6,"action":"move","reward":0.2414,"score":0.7402,"policy":"local_base"},{"step":7,"action":"resize","reward":0.1244,"score":0.7443,"policy":"local_base"}],"final_state":{"elements":[{"id":"title","bbox":[0.0938,0.0803,0.54,0.14],"role":"title"},{"id":"subtitle","bbox":[0.1138,0.2304,0.44,0.08],"role":"subtitle"},{"id":"hero_image","bbox":[0.1905,0.3012,0.654,0.4088],"role":"image"},{"id":"cta","bbox":[0.0764,0.7521,0.3,0.1],"role":"cta"},{"id":"logo","bbox":[0.7191,0.1033,0.14,0.14],"role":"logo"},{"id":"badge","bbox":[0.6963,0.6902,0.2,0.14],"role":"badge"}]}},"base:editorial_cover_v1":{"summary":{"final_score":0.7485,"instruction_score":0.4837,"steps_taken":9,"total_reward":1.638,"wall_time_sec":10.0},"trajectory":[{"step":1,"action":"promote","reward":0.1225,"score":0.7711,"policy":"local_base"},{"step":2,"action":"resize","reward":0.1803,"score":0.7914,"policy":"local_base"},{"step":3,"action":"distribute","reward":0.1242,"score":0.7956,"policy":"local_base"},{"step":4,"action":"reflow_group","reward":0.2631,"score":0.7346,"policy":"local_base"},{"step":5,"action":"promote","reward":0.2543,"score":0.7517,"policy":"local_base"},{"step":6,"action":"distribute","reward":0.22,"score":0.7293,"policy":"local_base"},{"step":7,"action":"reflow_group","reward":0.1267,"score":0.746,"policy":"local_base"},{"step":8,"action":"promote","reward":0.22,"score":0.7296,"policy":"local_base"},{"step":9,"action":"resize","reward":0.1272,"score":0.7485,"policy":"local_base"}],"final_state":{"elements":[{"id":"masthead","bbox":[0.1124,0.0693,0.56,0.1],"role":"title"},{"id":"hero_image","bbox":[0.1121,0.1963,0.73,0.41],"role":"image"},{"id":"headline_1","bbox":[0.0604,0.3307,0.64,0.14],"role":"title"},{"id":"headline_2","bbox":[0.0804,0.6318,0.46,0.08],"role":"subtitle"},{"id":"headline_3","bbox":[0.0804,0.863,0.4,0.06],"role":"subtitle"},{"id":"teaser","bbox":[0.7347,0.6902,0.16,0.12],"role":"badge"},{"id":"barcode","bbox":[0.7721,0.88,0.12,0.08],"role":"caption"},{"id":"logo","bbox":[0.1043,0.871,0.12,0.08],"role":"logo"}]}},"base:dense_flyer_v1":{"summary":{"final_score":0.5918,"instruction_score":0.6159,"steps_taken":10,"total_reward":1.876,"wall_time_sec":15.4},"trajectory":[{"step":1,"action":"reflow_group","reward":0.1375,"score":0.5617,"policy":"local_base"},{"step":2,"action":"promote","reward":0.1774,"score":0.5715,"policy":"local_base"},{"step":3,"action":"align","reward":0.22,"score":0.56,"policy":"local_base"},{"step":4,"action":"resize","reward":0.1768,"score":0.5672,"policy":"local_base"},{"step":5,"action":"resize","reward":0.2517,"score":0.5742,"policy":"local_base"},{"step":6,"action":"align","reward":0.22,"score":0.5627,"policy":"local_base"},{"step":7,"action":"reflow_group","reward":0.2529,"score":0.5742,"policy":"local_base"},{"step":8,"action":"resize","reward":0.1785,"score":0.6021,"policy":"local_base"},{"step":9,"action":"resize","reward":0.1682,"score":0.6039,"policy":"local_base"},{"step":10,"action":"align","reward":0.0925,"score":0.5918,"policy":"local_base"}],"final_state":{"elements":[{"id":"title","bbox":[0.071,0.0783,0.6,0.12],"role":"title"},{"id":"image_left","bbox":[0.0975,0.2323,0.264,0.22],"role":"image"},{"id":"image_right","bbox":[0.4204,0.257,0.264,0.22],"role":"image"},{"id":"price_badge","bbox":[0.7491,0.2337,0.16,0.12],"role":"badge"},{"id":"cta","bbox":[0.6593,0.4027,0.2,0.1],"role":"cta"},{"id":"details","bbox":[0.1731,0.4901,0.52,0.24],"role":"body"},{"id":"caption_1","bbox":[0.093,0.7082,0.22,0.1],"role":"caption"},{"id":"caption_2","bbox":[0.3838,0.7082,0.22,0.1],"role":"caption"},{"id":"sponsor_strip","bbox":[0.1131,0.89,0.78,0.07],"role":"caption"}]}},"sft:poster_basic_v1":{"summary":{"final_score":0.7709,"instruction_score":0.5176,"steps_taken":7,"total_reward":1.476,"wall_time_sec":9.9},"trajectory":[{"step":1,"action":"reflow_group","reward":0.1675,"score":0.7036,"policy":"finetuned_sft"},{"step":2,"action":"align","reward":0.2536,"score":0.718,"policy":"finetuned_sft"},{"step":3,"action":"anchor_to_region","reward":0.1984,"score":0.7481,"policy":"finetuned_sft"},{"step":4,"action":"promote","reward":0.25,"score":0.7105,"policy":"finetuned_sft"},{"step":5,"action":"align","reward":0.254,"score":0.7264,"policy":"finetuned_sft"},{"step":6,"action":"move","reward":0.2108,"score":0.7094,"policy":"finetuned_sft"},{"step":7,"action":"resize","reward":0.1413,"score":0.7709,"policy":"finetuned_sft"}],"final_state":{"elements":[{"id":"title","bbox":[0.0938,0.0803,0.54,0.14],"role":"title"},{"id":"subtitle","bbox":[0.0938,0.2304,0.44,0.08],"role":"subtitle"},{"id":"hero_image","bbox":[0.2205,0.3212,0.596,0.3725],"role":"image"},{"id":"cta","bbox":[0.0764,0.7521,0.3,0.1],"role":"cta"},{"id":"logo","bbox":[0.7244,0.0528,0.14,0.14],"role":"logo"},{"id":"badge","bbox":[0.6963,0.6902,0.2,0.14],"role":"badge"}]}},"sft:editorial_cover_v1":{"summary":{"final_score":0.7491,"instruction_score":0.4724,"steps_taken":9,"total_reward":1.999,"wall_time_sec":13.5},"trajectory":[{"step":1,"action":"anchor_to_region","reward":0.1798,"score":0.796,"policy":"finetuned_sft"},{"step":2,"action":"anchor_to_region","reward":0.22,"score":0.7801,"policy":"finetuned_sft"},{"step":3,"action":"align","reward":0.2527,"score":0.7908,"policy":"finetuned_sft"},{"step":4,"action":"distribute","reward":0.25,"score":0.7559,"policy":"finetuned_sft"},{"step":5,"action":"resize","reward":0.2593,"score":0.7931,"policy":"finetuned_sft"},{"step":6,"action":"promote","reward":0.2506,"score":0.7953,"policy":"finetuned_sft"},{"step":7,"action":"move","reward":0.21,"score":0.7559,"policy":"finetuned_sft"},{"step":8,"action":"align","reward":0.2538,"score":0.7705,"policy":"finetuned_sft"},{"step":9,"action":"resize","reward":0.1225,"score":0.7491,"policy":"finetuned_sft"}],"final_state":{"elements":[{"id":"masthead","bbox":[0.1004,0.0728,0.56,0.1],"role":"title"},{"id":"hero_image","bbox":[0.1221,0.1863,0.73,0.41],"role":"image"},{"id":"headline_1","bbox":[0.1004,0.4029,0.58,0.12],"role":"title"},{"id":"headline_2","bbox":[0.1004,0.7529,0.46,0.08],"role":"subtitle"},{"id":"headline_3","bbox":[0.2492,0.863,0.4,0.06],"role":"subtitle"},{"id":"teaser","bbox":[0.7347,0.6902,0.16,0.12],"role":"badge"},{"id":"barcode","bbox":[0.7721,0.88,0.12,0.08],"role":"caption"},{"id":"logo","bbox":[0.1043,0.871,0.12,0.08],"role":"logo"}]}},"sft:dense_flyer_v1":{"summary":{"final_score":0.6172,"instruction_score":0.6241,"steps_taken":10,"total_reward":2.055,"wall_time_sec":18.3},"trajectory":[{"step":1,"action":"resize","reward":0.1253,"score":0.5824,"policy":"finetuned_sft"},{"step":2,"action":"align","reward":0.22,"score":0.5689,"policy":"finetuned_sft"},{"step":3,"action":"resize","reward":0.177,"score":0.577,"policy":"finetuned_sft"},{"step":4,"action":"resize","reward":0.1887,"score":0.6132,"policy":"finetuned_sft"},{"step":5,"action":"resize","reward":0.2511,"score":0.615,"policy":"finetuned_sft"},{"step":6,"action":"align","reward":0.22,"score":0.6027,"policy":"finetuned_sft"},{"step":7,"action":"reflow_group","reward":0.2531,"score":0.615,"policy":"finetuned_sft"},{"step":8,"action":"promote","reward":0.25,"score":0.5934,"policy":"finetuned_sft"},{"step":9,"action":"align","reward":0.253,"score":0.6055,"policy":"finetuned_sft"},{"step":10,"action":"move","reward":0.117,"score":0.6172,"policy":"finetuned_sft"}],"final_state":{"elements":[{"id":"title","bbox":[0.081,0.0683,0.6,0.12],"role":"title"},{"id":"image_left","bbox":[0.0975,0.2323,0.264,0.22],"role":"image"},{"id":"image_right","bbox":[0.4204,0.257,0.264,0.22],"role":"image"},{"id":"price_badge","bbox":[0.7491,0.2337,0.16,0.12],"role":"badge"},{"id":"cta","bbox":[0.6593,0.4027,0.2,0.1],"role":"cta"},{"id":"details","bbox":[0.1531,0.4851,0.56,0.24],"role":"body"},{"id":"caption_1","bbox":[0.093,0.7082,0.22,0.1],"role":"caption"},{"id":"caption_2","bbox":[0.3838,0.7082,0.22,0.1],"role":"caption"},{"id":"sponsor_strip","bbox":[0.1131,0.89,0.78,0.07],"role":"caption"}]}},"grpo:poster_basic_v1":{"summary":{"final_score":0.7294,"instruction_score":0.4981,"steps_taken":7,"total_reward":1.488,"wall_time_sec":9.9},"trajectory":[{"step":1,"action":"reflow_group","reward":0.1675,"score":0.7036,"policy":"finetuned_grpo"},{"step":2,"action":"reflow_group","reward":0.2536,"score":0.718,"policy":"finetuned_grpo"},{"step":3,"action":"align","reward":0.22,"score":0.7036,"policy":"finetuned_grpo"},{"step":4,"action":"promote","reward":0.2547,"score":0.721,"policy":"finetuned_grpo"},{"step":5,"action":"move","reward":0.2108,"score":0.6999,"policy":"finetuned_grpo"},{"step":6,"action":"align","reward":0.254,"score":0.7159,"policy":"finetuned_grpo"},{"step":7,"action":"resize","reward":0.1271,"score":0.7294,"policy":"finetuned_grpo"}],"final_state":{"elements":[{"id":"title","bbox":[0.0938,0.0803,0.54,0.14],"role":"title"},{"id":"subtitle","bbox":[0.0938,0.2304,0.44,0.08],"role":"subtitle"},{"id":"hero_image","bbox":[0.2205,0.3212,0.596,0.3725],"role":"image"},{"id":"cta","bbox":[0.0764,0.7521,0.3,0.1],"role":"cta"},{"id":"logo","bbox":[0.7191,0.1033,0.14,0.14],"role":"logo"},{"id":"badge","bbox":[0.6963,0.6902,0.2,0.14],"role":"badge"}]}},"grpo:editorial_cover_v1":{"summary":{"final_score":0.7491,"instruction_score":0.4724,"steps_taken":9,"total_reward":1.999,"wall_time_sec":10.0},"trajectory":[{"step":1,"action":"anchor_to_region","reward":0.1798,"score":0.796,"policy":"finetuned_grpo"},{"step":2,"action":"anchor_to_region","reward":0.22,"score":0.7801,"policy":"finetuned_grpo"},{"step":3,"action":"align","reward":0.2527,"score":0.7908,"policy":"finetuned_grpo"},{"step":4,"action":"distribute","reward":0.25,"score":0.7559,"policy":"finetuned_grpo"},{"step":5,"action":"resize","reward":0.2593,"score":0.7931,"policy":"finetuned_grpo"},{"step":6,"action":"promote","reward":0.2506,"score":0.7953,"policy":"finetuned_grpo"},{"step":7,"action":"move","reward":0.21,"score":0.7559,"policy":"finetuned_grpo"},{"step":8,"action":"align","reward":0.2538,"score":0.7705,"policy":"finetuned_grpo"},{"step":9,"action":"resize","reward":0.1225,"score":0.7491,"policy":"finetuned_grpo"}],"final_state":{"elements":[{"id":"masthead","bbox":[0.1004,0.0728,0.56,0.1],"role":"title"},{"id":"hero_image","bbox":[0.1221,0.1863,0.73,0.41],"role":"image"},{"id":"headline_1","bbox":[0.1004,0.4029,0.58,0.12],"role":"title"},{"id":"headline_2","bbox":[0.1004,0.7529,0.46,0.08],"role":"subtitle"},{"id":"headline_3","bbox":[0.2492,0.863,0.4,0.06],"role":"subtitle"},{"id":"teaser","bbox":[0.7347,0.6902,0.16,0.12],"role":"badge"},{"id":"barcode","bbox":[0.7721,0.88,0.12,0.08],"role":"caption"},{"id":"logo","bbox":[0.1043,0.871,0.12,0.08],"role":"logo"}]}},"grpo:dense_flyer_v1":{"summary":{"final_score":0.6204,"instruction_score":0.7181,"steps_taken":10,"total_reward":2.174,"wall_time_sec":15.8},"trajectory":[{"step":1,"action":"align","reward":0.1375,"score":0.5617,"policy":"finetuned_grpo"},{"step":2,"action":"anchor_to_region","reward":0.2952,"score":0.6324,"policy":"finetuned_grpo"},{"step":3,"action":"promote","reward":0.175,"score":0.6136,"policy":"finetuned_grpo"},{"step":4,"action":"align","reward":0.2531,"score":0.6261,"policy":"finetuned_grpo"},{"step":5,"action":"reflow_group","reward":0.22,"score":0.6136,"policy":"finetuned_grpo"},{"step":6,"action":"promote","reward":0.2518,"score":0.6207,"policy":"finetuned_grpo"},{"step":7,"action":"move","reward":0.2432,"score":0.6284,"policy":"finetuned_grpo"},{"step":8,"action":"align","reward":0.22,"score":0.6158,"policy":"finetuned_grpo"},{"step":9,"action":"promote","reward":0.2554,"score":0.635,"policy":"finetuned_grpo"},{"step":10,"action":"resize","reward":0.1225,"score":0.6204,"policy":"finetuned_grpo"}],"final_state":{"elements":[{"id":"title","bbox":[0.041,0.0483,0.68,0.14],"role":"title"},{"id":"image_left","bbox":[0.0975,0.2323,0.264,0.22],"role":"image"},{"id":"image_right","bbox":[0.4204,0.257,0.264,0.22],"role":"image"},{"id":"price_badge","bbox":[0.7491,0.2337,0.16,0.12],"role":"badge"},{"id":"cta","bbox":[0.6392,0.6892,0.2,0.1],"role":"cta"},{"id":"details","bbox":[0.1831,0.5151,0.5,0.19],"role":"body"},{"id":"caption_1","bbox":[0.093,0.7082,0.22,0.1],"role":"caption"},{"id":"caption_2","bbox":[0.3838,0.7082,0.22,0.1],"role":"caption"},{"id":"sponsor_strip","bbox":[0.1131,0.89,0.78,0.07],"role":"caption"}]}}};
/* ── Tabs ── */
var tabs = document.querySelectorAll('.tab-btn');
var pages = document.querySelectorAll('.tab-page');
function switchToTab(tabName){
for(var j=0;j<tabs.length;j++){
tabs[j].classList.remove('active');
pages[j].classList.remove('active');
}
for(var k=0;k<tabs.length;k++){
if(tabs[k].getAttribute('data-tab')===tabName){
tabs[k].classList.add('active');
break;
}
}
var pg = document.getElementById('page-'+tabName);
if(pg) pg.classList.add('active');
}
for(var i=0;i<tabs.length;i++){
tabs[i].addEventListener('click', function(e){
switchToTab(e.currentTarget.getAttribute('data-tab'));
});
}
var backendInfo = {};
var pollTimer = null;
/* ── Backend ── */
function fetchBI(){
return fetch('/demo/backend_info').then(function(r){
if(!r.ok) throw new Error('HTTP '+r.status);
return r.json();
}).then(function(info){
backendInfo = info;
renderBC(info);
return info;
})['catch'](function(){
$('bl').textContent='Backend unavailable';
$('bd').className='dot dot-r';
});
}
function renderBC(info){
var d=$('bd'),l=$('bl'),det=$('bdet');
var b = (info && info.backend) ? info.backend : 'unknown';
if(b==='local-lora'){
if(info.loading && !info.ready){
d.className='dot dot-a';
l.innerHTML='<span class="spinner"></span>Loading '+(info.adapter_key||'sft').toUpperCase()+' adapter…';
det.textContent='Model warming up in background. Will auto-refresh.';
startPoll();
} else if(info.ready){
d.className='dot dot-g';
l.textContent='Fine-tuned '+(info.adapter_key||'').toUpperCase()+' \xb7 Qwen2.5-0.5B + LoRA ('+(info.device||'cpu')+')';
det.textContent='Loaded in '+(info.load_seconds||'?')+'s';
stopPoll();
} else {
d.className='dot dot-m';
l.textContent='Local LoRA (not yet loaded)';
det.textContent='Loads on first request';
}
} else if(b==='local-base'){
d.className='dot dot-a';
l.textContent='Base Qwen2.5-0.5B ('+(info.device||'cpu')+', no adapter)';
det.textContent='Running without LoRA fine-tuning';
stopPoll();
} else if(b==='router'){
d.className='dot dot-a';
l.textContent='Base Qwen2.5-0.5B (HF Router) β€” NOT fine-tuned';
det.textContent='Using HF Inference Router. Base model only.';
stopPoll();
} else if(info && info.load_error){
d.className='dot dot-r';
l.textContent='Load error';
det.textContent=info.load_error;
stopPoll();
} else {
d.className='dot dot-m';
l.textContent='No LLM backend';
det.textContent='Heuristic only';
stopPoll();
}
var ll=$('llm-lbl'), ld=$('llm-desc');
if(b==='local-lora'){
var ak=(info.adapter_key||'SFT').toUpperCase();
ll.textContent=ak+' LoRA Picker';
ld.textContent='Fine-tuned '+ak+' model chooses layout actions on '+(info.device||'cpu');
} else if(b==='local-base'){
ll.textContent='Base LLM Picker';
ld.textContent='Base model, no adapter β€” the control experiment';
} else if(b==='router'){
ll.textContent='Router LLM (base)';
ld.textContent='HF Router β€” NOT fine-tuned';
} else {
ll.textContent='LLM Picker (unavailable)';
ld.textContent='No model loaded';
}
var env = (info && info.env) ? info.env : {};
$('fenv').textContent='BACKEND='+(env.DESIGNGYM_BACKEND||'?')+' ADAPTER='+(env.DESIGNGYM_ADAPTER||'?')+' HF_TOKEN='+(env.HF_TOKEN_present?'set':'unset');
}
function startPoll(){if(!pollTimer) pollTimer=setInterval(fetchBI,3000);}
function stopPoll(){if(pollTimer){clearInterval(pollTimer);pollTimer=null;}}
/* ── Adapter switch ── */
$('asw').addEventListener('change',function(){
var key=this.value;
$('bl').innerHTML='<span class="spinner"></span>Switching to '+key.toUpperCase()+'…';
$('bd').className='dot dot-a';
fetch('/demo/switch_adapter',{
method:'POST',
headers:{'Content-Type':'application/json'},
body:JSON.stringify({adapter:key})
}).then(function(r){return r.json();}).then(function(d){
if(d.error) throw new Error(d.error);
fetchBI();
})['catch'](function(e){
$('bl').textContent='Switch failed: '+e.message;
$('bd').className='dot dot-r';
});
});
/* ── Canvas ── */
var cColors={title:'#bfdbfe',subtitle:'#dbeafe',image:'#bbf7d0',cta:'#fecaca',logo:'#fde68a',badge:'#ddd6fe',body:'#e2e8f0',caption:'#fef3c7',shape:'#ddd6fe',masthead:'#fed7aa',headline:'#a7f3d0'};
function drawState(state){
var c=$('canvas'); c.innerHTML='';
var ns='http://www.w3.org/2000/svg';
var bg=document.createElementNS(ns,'rect');
bg.setAttribute('x',0);bg.setAttribute('y',0);bg.setAttribute('width',800);bg.setAttribute('height',1000);bg.setAttribute('fill','#f8fafc');
c.appendChild(bg);
var els=(state&&state.elements)?state.elements:[];
if(!els.length){
var t=document.createElementNS(ns,'text');t.setAttribute('x',40);t.setAttribute('y',60);t.setAttribute('fill','#0f172a');t.setAttribute('font-size','18');t.textContent='Click Run to start an episode.';c.appendChild(t);return;
}
for(var i=0;i<els.length;i++){
var el=els[i],b=el.bbox||el.box;if(!b||b.length<4)continue;
var x=b[0]*800,y=b[1]*1000,w=b[2]*800,h=b[3]*1000;
var r=document.createElementNS(ns,'rect');
r.setAttribute('x',x);r.setAttribute('y',y);r.setAttribute('width',w);r.setAttribute('height',h);
r.setAttribute('rx',6);r.setAttribute('fill',cColors[el.role||el.type]||'#e5e7eb');
r.setAttribute('stroke','#0f172a');r.setAttribute('stroke-width',1.5);c.appendChild(r);
var lb=document.createElementNS(ns,'text');
lb.setAttribute('x',x+6);lb.setAttribute('y',y+18);lb.setAttribute('fill','#0f172a');lb.setAttribute('font-size','13');
lb.setAttribute('font-family','JetBrains Mono,monospace');lb.textContent=el.id||'el';c.appendChild(lb);
}
}
/* ── Scores ── */
function renderScores(s,st){
$('ms').textContent=fmt(s&&s.final_score!=null?s.final_score:(st?st.current_score:null));
$('mi').textContent=fmt(s&&s.instruction_score!=null?s.instruction_score:(st?st.instruction_score:null));
$('mst').textContent=''+(s?s.steps_taken||0:(st?st.step_count||0:0));
$('mr').textContent=fmt(s?s.total_reward||0:0,2);
}
/* ── Trajectory ── */
function polBadge(tag){
if(!tag) return '<span class="bg bg-h">?</span>';
if(tag.indexOf('finetuned_')===0) return '<span class="bg bg-ft">'+tag+'</span>';
if(tag==='heuristic') return '<span class="bg bg-h">heuristic</span>';
if(tag==='router_base') return '<span class="bg bg-rt">router_base</span>';
if(tag==='local_base') return '<span class="bg bg-b">local_base</span>';
if(tag.indexOf('fallback')>=0) return '<span class="bg bg-fb">'+tag+'</span>';
return '<span class="bg bg-h">'+tag+'</span>';
}
function rwCls(r){return r>0.001?'rp':r<-0.001?'rn':'rz';}
function renderTraj(traj){
var b=$('tb');b.innerHTML='';
for(var i=0;i<traj.length;i++){
var t=traj[i],tr=document.createElement('tr');
tr.innerHTML='<td>'+t.step+'</td><td>'+(t.action||'')+'</td><td class="'+rwCls(t.reward)+'">'+fmt(t.reward,3)+'</td><td>'+fmt(t.score,3)+'</td><td>'+polBadge(t.policy)+'</td>';
b.appendChild(tr);
}
$('tc').textContent=traj.length?'\xb7 '+traj.length+' step'+(traj.length===1?'':'s'):'β€” no steps yet';
}
/* ── Controls ── */
function setSts(text,cls){var s=$('sts');s.innerHTML=text;s.className='sts '+(cls||'');}
function selPol(){var r=document.querySelector('input[name="pol"]:checked');return r?r.value:'heuristic';}
function selMode(){var r=document.querySelector('input[name="runmode"]:checked');return r?r.value:'cached';}
/* update Run button label when mode changes */
var modeRadios = document.querySelectorAll('input[name="runmode"]');
for(var mi=0;mi<modeRadios.length;mi++){
modeRadios[mi].addEventListener('change', function(){
var m = selMode();
$('run').textContent = m==='cached' ? 'Show Cached Result' : 'Run Live Episode';
});
}
function resetEnv(){
setSts('<span class="spinner"></span>Resetting…','run');
return fetch('/demo/reset',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({task_id:$('task').value,seed:0})}).then(function(r){
if(!r.ok) throw new Error('reset '+r.status);return r.json();
}).then(function(d){
drawState(d.state);renderScores({steps_taken:0,total_reward:0},d.state);renderTraj([]);$('rj').textContent='';
setSts('Ready: '+$('task').value+'. Select mode and click Run.','');
})['catch'](function(e){setSts('Reset error: '+e.message,'err');});
}
/* ── Elapsed timer & wait banner ── */
var _elapsedTimer = null;
var _elapsedStart = 0;
function showWaitBanner(){
_elapsedStart = Date.now();
$('wait-banner').className = 'wait-banner show';
$('wb-timer').textContent = '0s';
_elapsedTimer = setInterval(function(){
var sec = Math.round((Date.now() - _elapsedStart) / 1000);
$('wb-timer').textContent = sec + 's';
}, 1000);
}
function hideWaitBanner(){
$('wait-banner').className = 'wait-banner';
if(_elapsedTimer){ clearInterval(_elapsedTimer); _elapsedTimer = null; }
}
/* nav buttons in wait banner and teaser */
if($('wb-bench')) $('wb-bench').addEventListener('click', function(){ switchToTab('bench'); });
if($('wb-about')) $('wb-about').addEventListener('click', function(){ switchToTab('about'); });
if($('go-bench')) $('go-bench').addEventListener('click', function(){ switchToTab('bench'); });
if($('go-blog')) $('go-blog').addEventListener('click', function(){
window.open('https://huggingface.co/spaces/yashvyasop/DesignGym/blob/main/Blog.md','_blank');
});
/* ── Cached result lookup ── */
function getCacheKey(){
var pol = selPol();
var task = $('task').value;
if(pol === 'heuristic') return 'heuristic:' + task;
var adapter = $('asw').value || 'sft';
return adapter + ':' + task;
}
function showCachedResult(){
var key = getCacheKey();
var entry = CACHED[key];
if(!entry){
setSts('No cached result for ' + key + '. Try Run Live instead.','err');
return;
}
var s = entry.summary;
var traj = entry.trajectory;
var fs = entry.final_state;
renderScores(s, null);
renderTraj(traj);
if(fs) drawState(fs);
$('rj').textContent = JSON.stringify(s, null, 2);
var src = '<span class="src-badge src-cached">CACHED</span> Pre-computed on MPS (M1) \xb7 seed=0 \xb7 deterministic \xb7 ';
src += s.steps_taken + ' steps \xb7 score ' + fmt(s.final_score);
if(s.wall_time_sec > 0) src += ' \xb7 originally ran in ' + s.wall_time_sec + 's';
setSts(src, 'ok');
}
/* ── Live run ── */
function runLiveEpisode(){
var pol=selPol();
var polName=pol==='heuristic'?'Heuristic':((backendInfo.adapter_key||'SFT').toUpperCase()+' LoRA');
var isLLM = pol !== 'heuristic';
var timeHint = isLLM ? '~1–1.5 min on CPU' : '~1s';
setSts('<span class="spinner"></span>Running '+polName+' live… ('+timeHint+')','run');
$('run').disabled=true;$('rst').disabled=true;
if(isLLM) showWaitBanner();
fetch('/demo/run_episode',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({policy:pol,task_id:$('task').value,seed:0})}).then(function(r){
if(!r.ok) return r.text().then(function(t){throw new Error('HTTP '+r.status+' '+t.substring(0,200));});
return r.json();
}).then(function(d){
if(d.error) throw new Error(d.error);
hideWaitBanner();
switchToTab('demo');
drawState(d.final_state);
renderScores(d.summary,d.final_state);
renderTraj(d.trajectory||[]);
$('rj').textContent=JSON.stringify(d.summary,null,2);
var src = '<span class="src-badge src-live">LIVE</span> ';
setSts(src+'Done in '+(d.summary.wall_time_sec||'?')+'s β€” '+(d.summary.steps_taken||0)+' steps \xb7 score '+fmt(d.summary.final_score),'ok');
})['catch'](function(e){hideWaitBanner();setSts('Error: '+e.message,'err');})
['finally'](function(){$('run').disabled=false;$('rst').disabled=false;});
}
/* ── Main run handler ── */
function handleRun(){
var mode = selMode();
if(mode === 'cached'){
showCachedResult();
} else {
runLiveEpisode();
}
}
$('run').addEventListener('click', handleRun);
$('rst').addEventListener('click',resetEnv);
$('task').addEventListener('change',resetEnv);
/* ── Init ── */
fetchBI();
resetEnv();
})();
</script>
</body>
</html>