SwapnilPatil28 commited on
Commit
6883897
·
verified ·
1 Parent(s): 26f0690

Upload 13 files

Browse files
Files changed (1) hide show
  1. server/app.py +51 -24
server/app.py CHANGED
@@ -223,25 +223,45 @@ def _dashboard_html() -> str:
223
  <h2>Training evidence</h2>
224
  <p class='sub'>
225
  Committed artifacts from the reference training run
226
- (Qwen2.5-1.5B-Instruct, 8 episodes/task, 3 epochs).
 
227
  </p>
228
  <div class='plots'>
229
  <figure>
230
- <img src='/artifacts/reward_curve.png' alt='Reward curve by policy' loading='lazy' />
231
- <figcaption>Mean episodic reward per task tier across Random / Heuristic /
232
- Base-LLM / SFT-LLM. SFT matches the heuristic demonstrator across every tier
233
- and outperforms the untuned base by <strong>+{hard}</strong> on hard incidents.</figcaption>
 
 
 
234
  </figure>
235
  <figure>
236
- <img src='/artifacts/training_curve.png' alt='SFT training loss and token accuracy' loading='lazy' />
237
- <figcaption>Supervised loss collapses from <code>~2.84 ~0.02</code> and
238
- next-token accuracy climbs from <code>~0.49 → ~0.99</code> in three epochs on 680 rollout tokens.</figcaption>
 
 
 
239
  </figure>
240
  <figure>
241
- <img src='/artifacts/reward_components.png' alt='Reward component decomposition' loading='lazy' />
242
- <figcaption>Per-component reward decomposition. SFT reproduces the
243
- heuristic's positive components (clue_bonus, mitigation_correct, closure_correct,
244
- speed_bonus) while the base model stalls on step_cost and SLA penalties.</figcaption>
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  </figure>
246
  </div>
247
  <p class='sub' style='margin-top:0.75rem'>
@@ -250,9 +270,7 @@ def _dashboard_html() -> str:
250
  ·
251
  <a href='/artifacts/training_log.json'>training_log.json</a>
252
  ·
253
- <a href='/artifacts/reward_curve_qwen0p5b.png'>0.5B ablation plot</a>
254
- ·
255
- <a href='/artifacts/summary_metrics_qwen0p5b.json'>0.5B metrics</a>
256
  </p>
257
  """.format(hard=_fmt(headline_delta))
258
  else:
@@ -358,19 +376,19 @@ def _dashboard_html() -> str:
358
  background: radial-gradient(1000px 600px at 10% -10%, #1e293b, var(--bg));
359
  color: var(--text); padding: 2rem; margin: 0; min-height: 100vh;
360
  }}
361
- header {{ display:flex; align-items:center; justify-content:space-between; max-width:1100px; margin:0 auto 1.5rem; flex-wrap:wrap; gap:1rem; }}
362
  .brand {{ display:flex; align-items:center; gap:0.75rem; }}
363
  .logo {{ width:44px; height:44px; border-radius:10px; background:linear-gradient(135deg,var(--primary),var(--accent)); }}
364
  h1 {{ font-size:1.6rem; margin:0; }}
365
- h2 {{ font-size:1.2rem; margin:1.8rem 0 0.6rem; color:#cbd5e1; }}
366
  .sub {{ color: var(--muted); }}
367
- .grid {{ display:grid; grid-template-columns: repeat(auto-fit,minmax(240px,1fr)); gap:1rem; max-width:1100px; margin:0 auto; }}
368
  .grid-3 {{ grid-template-columns: repeat(auto-fit,minmax(280px,1fr)); }}
369
  .card {{ background: var(--card); border: 1px solid #1f2a44; padding: 1.25rem; border-radius: 14px; }}
370
  .card h3 {{ margin:0 0 0.5rem; font-size:1rem; color:#f1f5f9; }}
371
  .pill {{ display:inline-block; padding:2px 8px; margin:2px; border-radius:999px; background:#1e293b; border:1px solid #334155; color:#cbd5e1; font-size:0.78rem; }}
372
  .pill.cta {{ background:linear-gradient(135deg,var(--primary),var(--accent)); color:#0b1225; border-color:transparent; font-weight:600; }}
373
- .container {{ max-width: 1100px; margin: 0 auto; }}
374
  code {{ background:#0b1225; border:1px solid #1f2a44; padding:2px 6px; border-radius:6px; color:#67e8f9; font-family:'JetBrains Mono', monospace; }}
375
  pre {{ background:#0b1225; border:1px solid #1f2a44; padding: 1rem; border-radius: 10px; color:#cbd5e1; overflow-x:auto; font-size:0.85rem; }}
376
  a {{ color: var(--accent); text-decoration: none; }}
@@ -379,11 +397,20 @@ def _dashboard_html() -> str:
379
  .kpi .num {{ font-size:1.6rem; font-weight:700; color:#f8fafc; }}
380
  .kpi .lbl {{ color: var(--muted); font-size:0.8rem; }}
381
  .kpi .num.good {{ color: var(--good); }}
382
- footer {{ max-width:1100px; margin:2rem auto 0; color:var(--muted); font-size:0.85rem; }}
383
- .plots {{ display:grid; grid-template-columns: repeat(auto-fit,minmax(300px,1fr)); gap:1rem; max-width:1100px; margin:0 auto; }}
384
- .plots figure {{ background: var(--card); border:1px solid #1f2a44; border-radius: 14px; padding: 0.75rem; margin:0; }}
385
- .plots img {{ width:100%; height:auto; border-radius:8px; background:#0b1225; }}
386
- .plots figcaption {{ color: var(--muted); font-size:0.8rem; margin-top:0.5rem; line-height:1.4; }}
 
 
 
 
 
 
 
 
 
387
  .table-wrap {{ overflow-x:auto; }}
388
  table {{ width:100%; border-collapse: collapse; margin-top:0.5rem; font-size:0.9rem; }}
389
  th, td {{ padding:0.5rem 0.75rem; text-align:left; border-bottom:1px solid #1f2a44; }}
 
223
  <h2>Training evidence</h2>
224
  <p class='sub'>
225
  Committed artifacts from the reference training run
226
+ (Qwen2.5-1.5B-Instruct, 8 episodes/task, 3 epochs) plus the
227
+ Qwen2.5-0.5B-Instruct ablation. Click any plot to open it full-size.
228
  </p>
229
  <div class='plots'>
230
  <figure>
231
+ <a href='/artifacts/reward_curve.png' target='_blank' rel='noopener'>
232
+ <img src='/artifacts/reward_curve.png' alt='Reward curve by policy (1.5B)' loading='lazy' />
233
+ </a>
234
+ <figcaption><strong>1.5B reward curve.</strong> Mean episodic reward per task tier
235
+ across Random / Heuristic / Base-LLM / SFT-LLM. SFT matches the heuristic
236
+ demonstrator across every tier and outperforms the untuned base by
237
+ <strong>+{hard}</strong> on hard incidents.</figcaption>
238
  </figure>
239
  <figure>
240
+ <a href='/artifacts/training_curve.png' target='_blank' rel='noopener'>
241
+ <img src='/artifacts/training_curve.png' alt='SFT training loss and token accuracy (1.5B)' loading='lazy' />
242
+ </a>
243
+ <figcaption><strong>1.5B training curve.</strong> Supervised loss collapses from
244
+ <code>~2.84 → ~0.02</code> and next-token accuracy climbs from
245
+ <code>~0.49 → ~0.99</code> over three epochs on 680 rollout tokens.</figcaption>
246
  </figure>
247
  <figure>
248
+ <a href='/artifacts/reward_components.png' target='_blank' rel='noopener'>
249
+ <img src='/artifacts/reward_components.png' alt='Reward component decomposition (1.5B)' loading='lazy' />
250
+ </a>
251
+ <figcaption><strong>1.5B reward-component breakdown.</strong> SFT reproduces the
252
+ heuristic's positive components (<code>clue_bonus</code>,
253
+ <code>mitigation_correct</code>, <code>closure_correct</code>,
254
+ <code>speed_bonus</code>) while the base model stalls on
255
+ <code>step_cost</code> and SLA penalties.</figcaption>
256
+ </figure>
257
+ <figure>
258
+ <a href='/artifacts/reward_curve_qwen0p5b.png' target='_blank' rel='noopener'>
259
+ <img src='/artifacts/reward_curve_qwen0p5b.png' alt='Reward curve by policy (0.5B ablation)' loading='lazy' />
260
+ </a>
261
+ <figcaption><strong>0.5B ablation reward curve.</strong> Same pipeline, smaller
262
+ backbone. SFT improves by only <strong>+0.43 / +0.14 / +0.00</strong> over base —
263
+ the 0.5B model is too small to absorb the multi-step, role-gated policy.
264
+ Scale is the story.</figcaption>
265
  </figure>
266
  </div>
267
  <p class='sub' style='margin-top:0.75rem'>
 
270
  ·
271
  <a href='/artifacts/training_log.json'>training_log.json</a>
272
  ·
273
+ <a href='/artifacts/summary_metrics_qwen0p5b.json'>summary_metrics_qwen0p5b.json</a>
 
 
274
  </p>
275
  """.format(hard=_fmt(headline_delta))
276
  else:
 
376
  background: radial-gradient(1000px 600px at 10% -10%, #1e293b, var(--bg));
377
  color: var(--text); padding: 2rem; margin: 0; min-height: 100vh;
378
  }}
379
+ header {{ display:flex; align-items:center; justify-content:space-between; max-width:1200px; margin:0 auto 1.5rem; flex-wrap:wrap; gap:1rem; }}
380
  .brand {{ display:flex; align-items:center; gap:0.75rem; }}
381
  .logo {{ width:44px; height:44px; border-radius:10px; background:linear-gradient(135deg,var(--primary),var(--accent)); }}
382
  h1 {{ font-size:1.6rem; margin:0; }}
383
+ h2 {{ font-size:1.25rem; margin:1.8rem 0 0.6rem; color:#cbd5e1; }}
384
  .sub {{ color: var(--muted); }}
385
+ .grid {{ display:grid; grid-template-columns: repeat(auto-fit,minmax(240px,1fr)); gap:1rem; max-width:1200px; margin:0 auto; }}
386
  .grid-3 {{ grid-template-columns: repeat(auto-fit,minmax(280px,1fr)); }}
387
  .card {{ background: var(--card); border: 1px solid #1f2a44; padding: 1.25rem; border-radius: 14px; }}
388
  .card h3 {{ margin:0 0 0.5rem; font-size:1rem; color:#f1f5f9; }}
389
  .pill {{ display:inline-block; padding:2px 8px; margin:2px; border-radius:999px; background:#1e293b; border:1px solid #334155; color:#cbd5e1; font-size:0.78rem; }}
390
  .pill.cta {{ background:linear-gradient(135deg,var(--primary),var(--accent)); color:#0b1225; border-color:transparent; font-weight:600; }}
391
+ .container {{ max-width: 1200px; margin: 0 auto; }}
392
  code {{ background:#0b1225; border:1px solid #1f2a44; padding:2px 6px; border-radius:6px; color:#67e8f9; font-family:'JetBrains Mono', monospace; }}
393
  pre {{ background:#0b1225; border:1px solid #1f2a44; padding: 1rem; border-radius: 10px; color:#cbd5e1; overflow-x:auto; font-size:0.85rem; }}
394
  a {{ color: var(--accent); text-decoration: none; }}
 
397
  .kpi .num {{ font-size:1.6rem; font-weight:700; color:#f8fafc; }}
398
  .kpi .lbl {{ color: var(--muted); font-size:0.8rem; }}
399
  .kpi .num.good {{ color: var(--good); }}
400
+ footer {{ max-width:1200px; margin:2rem auto 0; color:var(--muted); font-size:0.85rem; }}
401
+ /* Training-evidence plots: one plot per row, full content width,
402
+ so dense charts (reward curves, stacked bars) stay readable. */
403
+ .plots {{ display:flex; flex-direction:column; gap:1.5rem; max-width:1200px; margin:0 auto; }}
404
+ .plots figure {{ background: var(--card); border:1px solid #1f2a44; border-radius: 14px; padding: 1.25rem; margin:0; }}
405
+ .plots figure a {{ display:block; }}
406
+ .plots img {{
407
+ width:100%; height:auto; display:block;
408
+ max-width:1100px; margin:0 auto;
409
+ border-radius:10px; background:#0b1225;
410
+ transition: transform 0.2s ease;
411
+ }}
412
+ .plots img:hover {{ transform: scale(1.01); }}
413
+ .plots figcaption {{ color: var(--muted); font-size:0.9rem; margin-top:0.75rem; line-height:1.55; text-align:center; max-width:1000px; margin-left:auto; margin-right:auto; }}
414
  .table-wrap {{ overflow-x:auto; }}
415
  table {{ width:100%; border-collapse: collapse; margin-top:0.5rem; font-size:0.9rem; }}
416
  th, td {{ padding:0.5rem 0.75rem; text-align:left; border-bottom:1px solid #1f2a44; }}