Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>OpenEnv Debug Panel β Multi-Agent Ecosystem</title> | |
| <link rel="preconnect" href="https://fonts.googleapis.com"> | |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&family=JetBrains+Mono:wght@400;600&display=swap" rel="stylesheet"> | |
| <style> | |
| *{box-sizing:border-box;margin:0;padding:0} | |
| :root{ | |
| --bg:#0d1017;--surface:#151822;--surface2:#1c2030;--border:#262d40; | |
| --blue:#4f8ef7;--green:#22c55e;--amber:#f59e0b;--red:#ef4444;--purple:#a855f7;--cyan:#22d3ee; | |
| --text:#e2e8f0;--muted:#6b7a94;--mono:'JetBrains Mono','Fira Code',monospace; | |
| } | |
| body{background:var(--bg);color:var(--text);font-family:'Inter','Segoe UI',sans-serif;font-size:14px;height:100vh;overflow:hidden} | |
| /* ββ Header ββ */ | |
| .header{background:linear-gradient(135deg,#131828 0%,#1a2040 100%);border-bottom:1px solid var(--border);padding:12px 20px;display:flex;align-items:center;gap:14px;flex-shrink:0} | |
| .header-logo{display:flex;align-items:center;gap:10px} | |
| .logo-dot{width:10px;height:10px;border-radius:50%;animation:pulse 2s infinite} | |
| .logo-dot.green{background:var(--green);box-shadow:0 0 8px var(--green)} | |
| .logo-dot.err{background:var(--red);box-shadow:0 0 8px var(--red)} | |
| @keyframes pulse{0%,100%{opacity:1}50%{opacity:.5}} | |
| .header h1{font-size:16px;font-weight:700;color:#fff;white-space:nowrap} | |
| .badge{padding:3px 10px;border-radius:99px;font-size:10px;font-weight:600;background:#1e3a5f;color:var(--blue);border:1px solid #2563eb33} | |
| /* ββ Full Layout ββ */ | |
| .layout{display:grid;grid-template-columns:280px 1fr;height:calc(100vh - 50px)} | |
| .sidebar{background:var(--surface);border-right:1px solid var(--border);overflow-y:auto;padding:12px;display:flex;flex-direction:column;gap:10px} | |
| .main{display:flex;flex-direction:column;overflow:hidden;min-height:0} | |
| /* ββ Cards ββ */ | |
| .card{background:var(--surface2);border:1px solid var(--border);border-radius:8px;overflow:hidden} | |
| .card-hdr{padding:8px 12px;border-bottom:1px solid var(--border);font-size:11px;font-weight:600;color:var(--muted);text-transform:uppercase;letter-spacing:.04em;display:flex;align-items:center;gap:6px;background:var(--surface)} | |
| .card-body{padding:10px} | |
| /* ββ Domain tabs ββ */ | |
| .domain-tabs{display:flex;gap:3px;background:var(--bg);border-radius:6px;padding:3px} | |
| .domain-tab{flex:1;padding:6px 0;border:none;border-radius:5px;cursor:pointer;font-size:11px;font-weight:600;color:var(--muted);background:transparent;transition:all .2s} | |
| .domain-tab.active{color:#fff} | |
| .domain-tab[data-domain="security"].active{background:#1e1a2e;color:var(--purple);box-shadow:0 0 0 1px #a855f744} | |
| .domain-tab[data-domain="pytorch"].active{background:#1a2a1a;color:var(--green);box-shadow:0 0 0 1px #22c55e44} | |
| .domain-tab[data-domain="clinical"].active{background:#1a2030;color:var(--cyan);box-shadow:0 0 0 1px #22d3ee44} | |
| /* ββ Task list ββ */ | |
| .task-list{display:flex;flex-direction:column;gap:3px} | |
| .task-btn{padding:7px 10px;border:1px solid var(--border);border-radius:6px;background:transparent;color:var(--text);cursor:pointer;text-align:left;display:flex;align-items:center;gap:8px;transition:all .15s;font-size:12px} | |
| .task-btn:hover{border-color:var(--blue);background:#1e254033} | |
| .task-btn.active{border-color:var(--blue);background:#1e2540;color:#fff} | |
| .task-btn .diff{font-size:9px;font-weight:700;padding:2px 7px;border-radius:99px;margin-left:auto} | |
| .diff-easy{background:#14532d33;color:var(--green);border:1px solid #22c55e44} | |
| .diff-medium{background:#78350f33;color:var(--amber);border:1px solid #f59e0b44} | |
| .diff-hard{background:#7f1d1d33;color:var(--red);border:1px solid #ef444444} | |
| /* ββ Form elements ββ */ | |
| label{display:block;font-size:10px;color:var(--muted);font-weight:600;text-transform:uppercase;letter-spacing:.04em;margin-bottom:4px} | |
| input,select,textarea{width:100%;background:var(--bg);border:1px solid var(--border);border-radius:5px;padding:7px 9px;color:var(--text);font-size:12px;font-family:inherit;outline:none;transition:border .15s} | |
| input:focus,select:focus,textarea:focus{border-color:var(--blue)} | |
| textarea{resize:vertical;font-family:var(--mono);font-size:11px;min-height:60px} | |
| .field{margin-bottom:8px} | |
| /* ββ Buttons ββ */ | |
| .btn{padding:7px 14px;border:none;border-radius:6px;cursor:pointer;font-size:12px;font-weight:600;transition:all .15s;display:inline-flex;align-items:center;gap:5px} | |
| .btn-primary{background:var(--blue);color:#fff} | |
| .btn-primary:hover{background:#3b7de8} | |
| .btn-success{background:#166534;color:var(--green);border:1px solid #22c55e44} | |
| .btn-success:hover{background:#14532d} | |
| .btn-danger{background:#7f1d1d;color:var(--red);border:1px solid #ef444444} | |
| .btn-ghost{background:transparent;color:var(--muted);border:1px solid var(--border);font-size:11px} | |
| .btn-ghost:hover{color:var(--text);border-color:var(--text)} | |
| .btn:disabled{opacity:.4;cursor:not-allowed} | |
| /* ββ Top bar ββ */ | |
| .main-topbar{padding:8px 16px;border-bottom:1px solid var(--border);display:flex;align-items:center;gap:10px;flex-wrap:wrap;background:var(--surface);flex-shrink:0} | |
| .info-chip{background:var(--bg);border:1px solid var(--border);border-radius:5px;padding:4px 8px;font-size:10px;white-space:nowrap} | |
| .info-chip span{color:var(--muted);margin-right:3px} | |
| .info-chip strong{color:var(--text)} | |
| /* ββ Main content: 3 rows ββ */ | |
| .content-area{display:flex;flex-direction:column;flex:1;overflow:hidden;min-height:0} | |
| /* Row 1: Observation + Reward (flexible) */ | |
| .obs-reward-area{display:grid;grid-template-columns:1fr 340px;flex:1;overflow:hidden;min-height:0;border-bottom:1px solid var(--border)} | |
| /* Row 2: Action builder (auto height, scrollable) */ | |
| .action-section{border-bottom:1px solid var(--border);background:var(--surface);padding:10px 16px;max-height:220px;overflow-y:auto;flex-shrink:0} | |
| .action-tabs{display:flex;gap:3px;flex-wrap:wrap} | |
| .action-tab{padding:4px 10px;border:1px solid var(--border);border-radius:5px;cursor:pointer;font-size:10px;font-weight:600;color:var(--muted);background:transparent} | |
| .action-tab.active{border-color:var(--blue);color:var(--blue);background:#1e2540} | |
| .action-fields{display:none;grid-template-columns:1fr 1fr;gap:8px} | |
| .action-fields.visible{display:grid} | |
| .action-fields .full{grid-column:1/-1} | |
| /* Row 3: Step log (fixed 160px) */ | |
| .step-log{background:var(--bg);border-top:1px solid var(--border);overflow-y:auto;padding:8px 12px;font-family:var(--mono);font-size:11px;line-height:1.7;height:160px;flex-shrink:0} | |
| .log-line{display:flex;gap:8px;align-items:baseline} | |
| .log-time{color:var(--muted);flex-shrink:0;min-width:52px} | |
| .log-tag{flex-shrink:0;font-weight:700;min-width:56px} | |
| .log-tag.start{color:var(--blue)} | |
| .log-tag.step{color:var(--amber)} | |
| .log-tag.end{color:var(--green)} | |
| .log-tag.error{color:var(--red)} | |
| .log-tag.info{color:var(--purple)} | |
| .log-msg{color:var(--text);word-break:break-all} | |
| /* ββ JSON viewer ββ */ | |
| .json-view{background:var(--bg);font-family:var(--mono);font-size:11px;line-height:1.5;overflow-y:auto;padding:12px;white-space:pre-wrap;word-break:break-all;flex:1} | |
| .json-key{color:#93c5fd} | |
| .json-str{color:#86efac} | |
| .json-num{color:#fbbf24} | |
| .json-bool{color:#f87171} | |
| .json-null{color:var(--muted)} | |
| /* ββ Reward ββ */ | |
| .reward-section{padding:12px;overflow-y:auto;background:var(--surface)} | |
| .reward-display{text-align:center;padding:10px 0} | |
| .reward-number{font-size:42px;font-weight:800;font-family:var(--mono);line-height:1} | |
| .reward-bar-wrap{margin:8px 0;height:8px;background:var(--border);border-radius:99px;overflow:hidden} | |
| .reward-bar{height:100%;border-radius:99px;transition:width .5s ease;background:linear-gradient(90deg,var(--green),#84cc16)} | |
| .reward-label{font-size:10px;color:var(--muted)} | |
| .breakdown-item{display:flex;justify-content:space-between;align-items:center;padding:4px 0;border-bottom:1px solid var(--border);font-size:11px} | |
| .breakdown-item:last-child{border:none} | |
| .breakdown-val.pos{color:var(--green)} | |
| .breakdown-val.neg{color:var(--red)} | |
| /* ββ Task meta ββ */ | |
| .task-meta{background:var(--bg);border:1px solid var(--border);border-radius:6px;padding:8px 10px;font-size:11px;line-height:1.6;color:var(--muted)} | |
| .task-meta strong{color:var(--text);display:block;margin-bottom:3px;font-size:12px} | |
| /* ββ Inference runner ββ */ | |
| .inference-panel{background:var(--surface2);border:1px solid var(--border);border-radius:8px;padding:10px;margin-top:4px} | |
| .inference-progress{display:flex;gap:4px;flex-wrap:wrap;margin:6px 0} | |
| .task-chip{padding:2px 6px;border-radius:4px;font-size:9px;font-weight:700;border:1px solid var(--border);color:var(--muted)} | |
| .task-chip.running{border-color:var(--amber);color:var(--amber);animation:pulse 1s infinite} | |
| .task-chip.done{border-color:var(--green);color:var(--green)} | |
| .task-chip.fail{border-color:var(--red);color:var(--red)} | |
| /* ββ Status indicator ββ */ | |
| .status-dot{width:8px;height:8px;border-radius:50%;display:inline-block;flex-shrink:0} | |
| /* ββ Responsive ββ */ | |
| @media(max-width:900px){ | |
| .layout{grid-template-columns:1fr;grid-template-rows:auto 1fr} | |
| .sidebar{border-right:none;border-bottom:1px solid var(--border);max-height:260px;flex-direction:row;flex-wrap:wrap;overflow-x:auto} | |
| .obs-reward-area{grid-template-columns:1fr} | |
| } | |
| /* ββ Page Navigation ββ */ | |
| .page-tabs{display:flex;gap:2px;background:var(--bg);border-radius:6px;padding:2px;margin-left:16px} | |
| .page-tab{padding:5px 14px;border:none;border-radius:5px;cursor:pointer;font-size:11px;font-weight:600;color:var(--muted);background:transparent;transition:all .2s} | |
| .page-tab.active{color:#fff;background:var(--blue);box-shadow:0 0 12px #4f8ef733} | |
| .page-tab:hover:not(.active){color:var(--text);background:var(--surface2)} | |
| .page{display:none;height:calc(100vh - 50px);overflow:hidden} | |
| .page.visible{display:flex;flex-direction:column} | |
| /* ββ Benchmark Page ββ */ | |
| .bench-layout{display:grid;grid-template-columns:360px 1fr;height:100%;overflow:hidden} | |
| .bench-sidebar{background:var(--surface);border-right:1px solid var(--border);padding:16px;overflow-y:auto} | |
| .bench-main{display:flex;flex-direction:column;overflow:hidden} | |
| .bench-card{background:var(--surface2);border:1px solid var(--border);border-radius:10px;overflow:hidden;margin-bottom:12px} | |
| .bench-card-hdr{padding:10px 14px;border-bottom:1px solid var(--border);font-size:12px;font-weight:700;color:var(--text);display:flex;align-items:center;gap:8px;background:linear-gradient(135deg,var(--surface) 0%,var(--surface2) 100%)} | |
| .bench-card-body{padding:12px} | |
| .preset-row{display:flex;gap:4px;flex-wrap:wrap;margin-bottom:10px} | |
| .preset-btn{padding:4px 10px;border:1px solid var(--border);border-radius:5px;cursor:pointer;font-size:10px;font-weight:600;color:var(--muted);background:transparent;transition:all .15s} | |
| .preset-btn:hover{border-color:var(--blue);color:var(--blue)} | |
| .preset-btn.active{border-color:var(--blue);background:#1e2540;color:var(--blue)} | |
| .bench-field{margin-bottom:10px} | |
| .bench-field label{font-size:10px;color:var(--muted);font-weight:600;text-transform:uppercase;letter-spacing:.04em;margin-bottom:4px;display:block} | |
| .bench-field input,.bench-field select{width:100%;background:var(--bg);border:1px solid var(--border);border-radius:6px;padding:8px 10px;color:var(--text);font-size:12px;font-family:inherit;outline:none;transition:border .15s} | |
| .bench-field input:focus{border-color:var(--blue)} | |
| .bench-field input[type=password]{font-family:var(--mono);letter-spacing:2px} | |
| .run-btn{width:100%;padding:10px;border:none;border-radius:8px;cursor:pointer;font-size:13px;font-weight:700;color:#fff;background:linear-gradient(135deg,#4f8ef7 0%,#a855f7 100%);transition:all .2s;display:flex;align-items:center;justify-content:center;gap:8px} | |
| .run-btn:hover{transform:translateY(-1px);box-shadow:0 4px 20px #4f8ef744} | |
| .run-btn:disabled{opacity:.5;cursor:not-allowed;transform:none;box-shadow:none} | |
| .run-btn.running{background:linear-gradient(135deg,#f59e0b 0%,#ef4444 100%);animation:pulse 1.5s infinite} | |
| /* ββ Results Table ββ */ | |
| .results-area{flex:1;overflow-y:auto;padding:16px;background:var(--bg)} | |
| .results-table{width:100%;border-collapse:collapse;font-size:12px} | |
| .results-table th{padding:8px 10px;text-align:left;font-size:10px;font-weight:700;color:var(--muted);text-transform:uppercase;letter-spacing:.04em;border-bottom:2px solid var(--border);position:sticky;top:0;background:var(--bg);z-index:1} | |
| .results-table td{padding:6px 10px;border-bottom:1px solid var(--border)} | |
| .results-table tr:hover{background:var(--surface2)} | |
| .score-cell{font-family:var(--mono);font-weight:700;font-size:12px} | |
| .score-high{color:var(--green)} | |
| .score-mid{color:var(--amber)} | |
| .score-low{color:var(--red)} | |
| .avg-cell{font-size:14px;font-weight:800} | |
| /* ββ Bar Chart ββ */ | |
| .chart-container{padding:16px;border-top:1px solid var(--border);background:var(--surface);flex-shrink:0;max-height:280px;overflow-y:auto} | |
| .chart-bar-row{display:flex;align-items:center;gap:8px;margin-bottom:6px} | |
| .chart-label{width:120px;font-size:11px;font-weight:600;color:var(--text);text-align:right;flex-shrink:0;white-space:nowrap;overflow:hidden;text-overflow:ellipsis} | |
| .chart-bar-bg{flex:1;height:22px;background:var(--bg);border-radius:4px;overflow:hidden;border:1px solid var(--border)} | |
| .chart-bar-fill{height:100%;border-radius:3px;transition:width .8s ease;display:flex;align-items:center;padding:0 6px;font-size:10px;font-weight:700;color:#fff;white-space:nowrap;min-width:0} | |
| /* ββ Benchmark Log ββ */ | |
| .bench-log{background:var(--bg);border-top:1px solid var(--border);height:200px;overflow-y:auto;padding:8px 12px;font-family:var(--mono);font-size:11px;line-height:1.6;flex-shrink:0} | |
| .bench-log .log-warn{color:var(--amber)} | |
| .bench-log .log-err{color:var(--red)} | |
| .bench-log .log-ok{color:var(--green)} | |
| .bench-log .log-info{color:var(--blue)} | |
| /* ββ Empty State ββ */ | |
| .empty-state{display:flex;flex-direction:column;align-items:center;justify-content:center;height:100%;color:var(--muted);gap:12px} | |
| .empty-state .icon{font-size:48px;opacity:.3} | |
| .empty-state p{font-size:13px;text-align:center;max-width:260px;line-height:1.5} | |
| </style> | |
| </head> | |
| <body> | |
| <!-- ββ HEADER ββ --> | |
| <div class="header"> | |
| <div class="header-logo"> | |
| <div class="logo-dot green" id="status-dot"></div> | |
| <h1>OpenEnv Debug Panel</h1> | |
| <span class="badge">Multi-Agent Ecosystem</span> | |
| </div> | |
| <div style="display:flex;gap:8px;margin-left:auto;align-items:center"> | |
| <div class="page-tabs"> | |
| <button class="page-tab active" onclick="switchPage('debug')" id="ptab-debug">π§ Debug</button> | |
| <button class="page-tab" onclick="switchPage('benchmark')" id="ptab-benchmark">π Benchmark</button> | |
| </div> | |
| <span class="badge" style="background:#1a2a1a;color:var(--green);border-color:#22c55e33">Security Β· PyTorch Β· Clinical</span> | |
| <span id="server-status" style="font-size:10px;color:var(--muted)">Checking...</span> | |
| </div> | |
| </div> | |
| <!-- ββ PAGE: DEBUG ββ --> | |
| <div class="page visible" id="page-debug"> | |
| <!-- ββ LAYOUT ββ --> | |
| <div class="layout"> | |
| <!-- SIDEBAR --> | |
| <div class="sidebar"> | |
| <!-- Domain Selector --> | |
| <div class="card"> | |
| <div class="card-hdr">π― Domain</div> | |
| <div class="card-body" style="padding:6px"> | |
| <div class="domain-tabs"> | |
| <button class="domain-tab active" data-domain="security" onclick="switchDomain('security')">Security</button> | |
| <button class="domain-tab" data-domain="pytorch" onclick="switchDomain('pytorch')">PyTorch</button> | |
| <button class="domain-tab" data-domain="clinical" onclick="switchDomain('clinical')">Clinical</button> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Task Selector --> | |
| <div class="card"> | |
| <div class="card-hdr">π Tasks</div> | |
| <div class="card-body" style="padding:6px"> | |
| <div class="task-list" id="task-list"></div> | |
| </div> | |
| </div> | |
| <!-- Task Info --> | |
| <div class="card"> | |
| <div class="card-hdr">βΉοΈ Task Info</div> | |
| <div class="card-body"> | |
| <div class="task-meta" id="task-meta">Select a task to see details.</div> | |
| </div> | |
| </div> | |
| <!-- Run Full Inference --> | |
| <div class="inference-panel"> | |
| <div style="font-size:11px;font-weight:700;color:var(--text);margin-bottom:6px">β‘ Full Inference Run</div> | |
| <div style="font-size:10px;color:var(--muted);margin-bottom:8px">Runs all 9 tasks via /inference endpoint.</div> | |
| <button class="btn btn-success" style="width:100%;font-size:11px" onclick="runFullInference()" id="inf-btn">βΆ Run All 9 Tasks</button> | |
| <div class="inference-progress" id="inf-progress" style="display:none"></div> | |
| <div id="inf-scores" style="margin-top:6px;font-family:var(--mono);font-size:10px"></div> | |
| </div> | |
| </div> | |
| <!-- MAIN PANEL --> | |
| <div class="main"> | |
| <!-- Top bar --> | |
| <div class="main-topbar"> | |
| <div style="display:flex;gap:8px;flex:1;flex-wrap:wrap"> | |
| <div class="info-chip"><span>Task:</span><strong id="chip-task">β</strong></div> | |
| <div class="info-chip"><span>Episode:</span><strong id="chip-episode" style="font-family:var(--mono);font-size:9px">β</strong></div> | |
| <div class="info-chip"><span>Step:</span><strong id="chip-step">0</strong></div> | |
| <div class="info-chip"><span>Reward:</span><strong id="chip-reward" style="color:var(--green)">0.0000</strong></div> | |
| <div class="info-chip"><span>Done:</span><strong id="chip-done">β</strong></div> | |
| </div> | |
| <div style="display:flex;gap:6px"> | |
| <button class="btn btn-primary" onclick="doReset()" id="btn-reset">β³ Reset</button> | |
| <button class="btn btn-success" onclick="doStep()" id="btn-step" disabled>βΆ Step</button> | |
| <button class="btn btn-ghost" onclick="clearLog()">π Clear</button> | |
| </div> | |
| </div> | |
| <!-- Content area: 3 flex rows --> | |
| <div class="content-area"> | |
| <!-- ROW 1: Observation + Reward --> | |
| <div class="obs-reward-area"> | |
| <!-- Observation --> | |
| <div style="display:flex;flex-direction:column;overflow:hidden;border-right:1px solid var(--border)"> | |
| <div class="card-hdr">π₯ Observation</div> | |
| <div class="json-view" id="obs-view"> | |
| <span style="color:var(--muted)">Press Reset to load the first observation...</span> | |
| </div> | |
| </div> | |
| <!-- Reward --> | |
| <div style="display:flex;flex-direction:column;overflow:hidden"> | |
| <div class="card-hdr">π Reward</div> | |
| <div class="reward-section"> | |
| <div class="reward-display"> | |
| <div class="reward-number" id="reward-num" style="color:var(--muted)">β</div> | |
| <div class="reward-bar-wrap"><div class="reward-bar" id="reward-bar" style="width:0%"></div></div> | |
| <div class="reward-label" id="reward-label">No reward yet</div> | |
| </div> | |
| <div id="reward-breakdown"></div> | |
| <div id="step-result-raw" style="margin-top:6px"></div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- ROW 2: Action builder --> | |
| <div class="action-section"> | |
| <div style="display:flex;align-items:center;gap:8px;margin-bottom:8px"> | |
| <div style="font-size:11px;font-weight:700;color:var(--text)">β‘ Build Action</div> | |
| <div class="action-tabs" id="action-tabs"></div> | |
| <button class="btn btn-ghost" style="margin-left:auto" onclick="toggleRawJson()">{ } Raw JSON</button> | |
| </div> | |
| <div id="action-fields-container"></div> | |
| <div id="raw-json-area" style="display:none"> | |
| <div class="field"> | |
| <label>Raw JSON Action</label> | |
| <textarea id="raw-action" rows="3" placeholder='{"action_type":"identify_vulnerability","vuln_type":"sql_injection","cvss_score":7.5,"severity":"high"}'></textarea> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- ROW 3: Step log (outside content-area, fixed height) --> | |
| <div class="step-log" id="step-log"> | |
| <div class="log-line"><span class="log-tag info">INFO</span><span class="log-msg">Debug panel ready. Select a task and press Reset to start.</span></div> | |
| </div> | |
| </div> | |
| </div> | |
| <script> | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| // DATA | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| const TASKS = { | |
| security: [ | |
| { id:'sec_easy', label:'Injection Detection', diff:'easy', desc:'Identify whether a tool-call has a vulnerability. Return vuln_type, cvss_score, severity.', actions:['identify_vulnerability'] }, | |
| { id:'sec_medium', label:'Multi-Vuln Scan', diff:'medium', desc:'Scan a code module for multiple vulnerabilities, then propose fixes.', actions:['identify_vulnerability','propose_fix'] }, | |
| { id:'sec_hard', label:'Auto-Sanitize + Review', diff:'hard', desc:'Identify, fix, and revise code based on reviewer feedback. Multi-turn.', actions:['identify_vulnerability','propose_fix','revise_fix'] }, | |
| ], | |
| pytorch: [ | |
| { id:'dep_easy', label:'Deprecation Mapper', diff:'easy', desc:'Detect deprecated PyTorch 1.x APIs and flag with replacements.', actions:['flag_outdated'] }, | |
| { id:'dep_medium', label:'Dependency Resolver', diff:'medium', desc:'Resolve version conflicts using a compatibility matrix.', actions:['resolve_conflict'] }, | |
| { id:'dep_hard', label:'Graph-Break Hunter', diff:'hard', desc:'Find and fix torch.compile breaking patterns.', actions:['migrate_api'] }, | |
| ], | |
| clinical: [ | |
| { id:'cli_easy', label:'Gap Detection', diff:'easy', desc:'Identify missing mandatory steps before a procedure.', actions:['detect_gap'] }, | |
| { id:'cli_medium', label:'Priority Recovery', diff:'medium', desc:'Detect gaps then rank clinical issues by urgency.', actions:['detect_gap','rank_issues'] }, | |
| { id:'cli_hard', label:'Full Re-plan', diff:'hard', desc:'Detect, rank, and reorder recovery steps respecting dependencies.', actions:['detect_gap','rank_issues','order_steps'] }, | |
| ] | |
| }; | |
| const ACTION_SCHEMAS = { | |
| identify_vulnerability: { | |
| label: 'Identify Vuln', | |
| fields: [ | |
| { key:'vuln_type', label:'Vulnerability Type', type:'select', options:['sql_injection','xss','idor','hardcoded_secret','missing_auth','jwt_misuse','path_traversal','ssrf','rate_limit_missing','xxe'] }, | |
| { key:'cvss_score', label:'CVSS Score (0β10)', type:'number', placeholder:'7.5', min:0, max:10, step:0.1 }, | |
| { key:'severity', label:'Severity', type:'select', options:['critical','high','medium','low','info'] }, | |
| { key:'affected_line', label:'Affected Line', type:'number', placeholder:'3' }, | |
| ] | |
| }, | |
| propose_fix: { | |
| label: 'Propose Fix', | |
| fields: [ | |
| { key:'fix_code', label:'Fixed Code', type:'textarea', placeholder:'db.execute(sql, (param,))', full:true }, | |
| { key:'explanation', label:'Explanation', type:'textarea', placeholder:'Use parameterized queries', full:true }, | |
| ] | |
| }, | |
| revise_fix: { | |
| label: 'Revise Fix', | |
| fields: [ | |
| { key:'fix_code', label:'Revised Code', type:'textarea', placeholder:'Complete corrected code', full:true }, | |
| { key:'addressed_feedback', label:'Addressed Feedback', type:'textarea', placeholder:'Paste reviewer_feedback here', full:true }, | |
| ] | |
| }, | |
| flag_outdated: { | |
| label: 'Flag Outdated', | |
| fields: [ | |
| { key:'packages_json', label:'Outdated Packages (JSON)', type:'textarea', placeholder:'{"torch": "1.9.0", "numpy": "1.21.0"}', full:true }, | |
| { key:'deprecated_api', label:'Deprecated API', type:'text', placeholder:'torch.autograd.Variable' }, | |
| { key:'replacement', label:'Replacement', type:'text', placeholder:'plain tensor' }, | |
| ] | |
| }, | |
| resolve_conflict: { | |
| label: 'Resolve Conflict', | |
| fields: [ | |
| { key:'packages_json', label:'Resolved Packages (JSON)', type:'textarea', placeholder:'{"torch":"2.1.0","numpy":"1.24.3"}', full:true }, | |
| { key:'reasoning', label:'Reasoning', type:'textarea', placeholder:'torch 2.1 requires numpy>=1.24', full:true }, | |
| ] | |
| }, | |
| migrate_api: { | |
| label: 'Migrate API', | |
| fields: [ | |
| { key:'completed_items_json', label:'Completed Break IDs (JSON)', type:'textarea', placeholder:'["break_001"]', full:true }, | |
| { key:'code_changes_json', label:'Code Changes (JSON)', type:'textarea', placeholder:'{"break_001":"use torch.where"}', full:true }, | |
| ] | |
| }, | |
| detect_gap: { | |
| label: 'Detect Gap', | |
| fields: [ | |
| { key:'missing_steps_json', label:'Missing Steps (JSON array)', type:'textarea', placeholder:'["pre_op_consent","blood_test"]', full:true }, | |
| { key:'risk_level', label:'Risk Level', type:'select', options:['critical','high','medium','low'] }, | |
| ] | |
| }, | |
| rank_issues: { | |
| label: 'Rank Issues', | |
| fields: [ | |
| { key:'priority_order_json', label:'Priority Order (highest first)', type:'textarea', placeholder:'["blood_test","pre_op_consent"]', full:true }, | |
| ] | |
| }, | |
| order_steps: { | |
| label: 'Order Steps', | |
| fields: [ | |
| { key:'recovery_steps_json', label:'Recovery Steps (ordered)', type:'textarea', placeholder:'["specialist","alt_treatment","post_op"]', full:true }, | |
| ] | |
| } | |
| }; | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| // STATE | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| let state = { | |
| domain: 'security', | |
| task: TASKS.security[0], | |
| episodeId: null, | |
| step: 0, | |
| totalReward: 0, | |
| done: false, | |
| currentAction: 'identify_vulnerability', | |
| rawMode: false | |
| }; | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| // INIT | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| function init() { | |
| renderTaskList(); | |
| selectTask(state.task); | |
| checkServerHealth(); | |
| setInterval(checkServerHealth, 15000); | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| // DOMAIN / TASK | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| function switchDomain(domain) { | |
| state.domain = domain; | |
| state.task = TASKS[domain][0]; | |
| document.querySelectorAll('.domain-tab').forEach(t => t.classList.toggle('active', t.dataset.domain === domain)); | |
| renderTaskList(); | |
| selectTask(state.task); | |
| } | |
| function renderTaskList() { | |
| const list = document.getElementById('task-list'); | |
| list.innerHTML = ''; | |
| TASKS[state.domain].forEach(task => { | |
| const btn = document.createElement('button'); | |
| btn.className = 'task-btn' + (task.id === state.task.id ? ' active' : ''); | |
| btn.innerHTML = `<span>${task.label}</span><span class="diff diff-${task.diff}">${task.diff.toUpperCase()}</span>`; | |
| btn.onclick = () => selectTask(task); | |
| list.appendChild(btn); | |
| }); | |
| } | |
| function selectTask(task) { | |
| state.task = task; | |
| state.episodeId = null; | |
| state.step = 0; | |
| state.totalReward = 0; | |
| state.done = false; | |
| document.querySelectorAll('.task-btn').forEach(b => b.classList.toggle('active', b.querySelector('span').textContent === task.label)); | |
| document.getElementById('task-meta').innerHTML = `<strong>${task.label} (${task.id})</strong>${task.desc}<br><br><span style="color:var(--blue)">Actions:</span> ${task.actions.join(' β ')}`; | |
| document.getElementById('chip-task').textContent = task.id; | |
| document.getElementById('chip-episode').textContent = 'β'; | |
| document.getElementById('chip-step').textContent = '0'; | |
| document.getElementById('chip-reward').textContent = '0.0000'; | |
| document.getElementById('chip-done').textContent = 'β'; | |
| document.getElementById('obs-view').innerHTML = '<span style="color:var(--muted)">Press Reset to start this task...</span>'; | |
| document.getElementById('reward-num').textContent = 'β'; | |
| document.getElementById('reward-num').style.color = 'var(--muted)'; | |
| document.getElementById('reward-bar').style.width = '0%'; | |
| document.getElementById('reward-label').textContent = 'No reward yet'; | |
| document.getElementById('reward-breakdown').innerHTML = ''; | |
| document.getElementById('step-result-raw').innerHTML = ''; | |
| document.getElementById('btn-step').disabled = true; | |
| document.getElementById('btn-step').textContent = 'βΆ Step'; | |
| state.currentAction = task.actions[0]; | |
| renderActionTabs(); | |
| renderActionFields(); | |
| log('info', `Selected: ${task.id} | ${task.label}`); | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| // ACTION BUILDER | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| // Pre-built examples for each action type (shown when fields are empty) | |
| const ACTION_EXAMPLES = { | |
| identify_vulnerability: { | |
| action_type: 'identify_vulnerability', | |
| vuln_type: 'sql_injection', | |
| cvss_score: 8.5, | |
| severity: 'critical', | |
| }, | |
| propose_fix: { | |
| action_type: 'propose_fix', | |
| fix_code: 'db.execute("SELECT * FROM users WHERE name = ?", (user_input,))', | |
| explanation: 'Use parameterized query to prevent SQL injection', | |
| }, | |
| revise_fix: { | |
| action_type: 'revise_fix', | |
| fix_code: 'db.execute("SELECT * FROM users WHERE name = ?", (sanitize(user_input),))', | |
| addressed_feedback: 'Added input validation on top of parameterized query', | |
| }, | |
| flag_outdated: { | |
| action_type: 'flag_outdated', | |
| packages: { torch: '1.9.0' }, | |
| deprecated_api: 'torch.autograd.Variable', | |
| replacement: 'plain tensor (remove Variable wrapper)', | |
| }, | |
| resolve_conflict: { | |
| action_type: 'resolve_conflict', | |
| packages: { torch: '2.1.0', numpy: '1.24.0' }, | |
| reasoning: 'torch 2.1 requires numpy>=1.24 per compatibility matrix', | |
| }, | |
| migrate_api: { | |
| action_type: 'migrate_api', | |
| completed_items: ['break_001', 'break_002', 'break_003'], | |
| code_changes: { | |
| break_001: 'use torch.where instead of if x.item()', | |
| break_002: 'use tensor.shape[0] instead of len(x)', | |
| break_003: 'use x.detach().numpy() outside compiled fn', | |
| }, | |
| }, | |
| detect_gap: { | |
| action_type: 'detect_gap', | |
| missing_steps: ['pre_op_consent', 'blood_work'], | |
| risk_level: 'critical', | |
| }, | |
| rank_issues: { | |
| action_type: 'rank_issues', | |
| priority_order: ['resolve_insurance', 'pre_op_consent', 'book_specialist'], | |
| }, | |
| order_steps: { | |
| action_type: 'order_steps', | |
| recovery_steps: ['resolve_insurance', 'book_specialist', 'complete_pre_op', 'schedule_surgery'], | |
| }, | |
| }; | |
| function renderActionTabs() { | |
| const tabs = document.getElementById('action-tabs'); | |
| tabs.innerHTML = ''; | |
| state.task.actions.forEach(a => { | |
| const t = document.createElement('button'); | |
| t.className = 'action-tab' + (a === state.currentAction ? ' active' : ''); | |
| t.textContent = ACTION_SCHEMAS[a]?.label || a; | |
| t.onclick = () => { state.currentAction = a; renderActionTabs(); renderActionFields(); syncRawJson(); }; | |
| tabs.appendChild(t); | |
| }); | |
| } | |
| function renderActionFields() { | |
| const container = document.getElementById('action-fields-container'); | |
| const schema = ACTION_SCHEMAS[state.currentAction]; | |
| if (!schema) { container.innerHTML = '<div style="color:var(--muted);font-size:11px">No schema.</div>'; return; } | |
| container.innerHTML = ''; | |
| const grid = document.createElement('div'); | |
| grid.className = 'action-fields visible'; | |
| schema.fields.forEach(f => { | |
| const wrap = document.createElement('div'); | |
| wrap.className = 'field' + (f.full ? ' full' : ''); | |
| const lbl = document.createElement('label'); | |
| lbl.textContent = f.label; | |
| wrap.appendChild(lbl); | |
| let el; | |
| if (f.type === 'select') { | |
| el = document.createElement('select'); | |
| el.id = 'af-' + f.key; | |
| f.options.forEach(o => { const op = document.createElement('option'); op.value = op.textContent = o; el.appendChild(op); }); | |
| el.addEventListener('change', syncRawJson); | |
| } else if (f.type === 'textarea') { | |
| el = document.createElement('textarea'); | |
| el.id = 'af-' + f.key; | |
| el.placeholder = f.placeholder || ''; | |
| el.rows = 2; | |
| el.addEventListener('input', syncRawJson); | |
| } else { | |
| el = document.createElement('input'); | |
| el.type = f.type || 'text'; | |
| el.id = 'af-' + f.key; | |
| el.placeholder = f.placeholder || ''; | |
| if (f.min !== undefined) el.min = f.min; | |
| if (f.max !== undefined) el.max = f.max; | |
| if (f.step !== undefined) el.step = f.step; | |
| el.addEventListener('input', syncRawJson); | |
| } | |
| wrap.appendChild(el); | |
| grid.appendChild(wrap); | |
| }); | |
| container.appendChild(grid); | |
| // Set initial raw JSON | |
| syncRawJson(); | |
| } | |
| function buildAction() { | |
| if (state.rawMode) { | |
| try { return JSON.parse(document.getElementById('raw-action').value); } | |
| catch(e) { log('error', 'Invalid JSON: ' + e.message); return null; } | |
| } | |
| return _buildActionFromFields(); | |
| } | |
| function _buildActionFromFields() { | |
| const schema = ACTION_SCHEMAS[state.currentAction]; | |
| const action = { action_type: state.currentAction }; | |
| schema.fields.forEach(f => { | |
| const el = document.getElementById('af-' + f.key); | |
| if (!el) return; | |
| let val = el.value.trim(); | |
| if (!val) return; | |
| if (f.key.endsWith('_json')) { | |
| try { action[f.key.replace('_json','')] = JSON.parse(val); } | |
| catch(e) { action[f.key.replace('_json','')] = val; } | |
| } else if (f.type === 'number') { | |
| action[f.key] = parseFloat(val); | |
| } else { | |
| action[f.key] = val; | |
| } | |
| }); | |
| return action; | |
| } | |
| function syncRawJson() { | |
| const action = _buildActionFromFields(); | |
| // If form is mostly empty, show the example instead | |
| const fieldCount = Object.keys(action).length; | |
| const display = fieldCount <= 1 ? ACTION_EXAMPLES[state.currentAction] || action : action; | |
| document.getElementById('raw-action').value = JSON.stringify(display, null, 2); | |
| } | |
| function toggleRawJson() { | |
| state.rawMode = !state.rawMode; | |
| document.getElementById('raw-json-area').style.display = state.rawMode ? 'block' : 'none'; | |
| document.getElementById('action-fields-container').style.display = state.rawMode ? 'none' : 'block'; | |
| if (state.rawMode) syncRawJson(); | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| // API CALLS | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| async function doReset() { | |
| const btn = document.getElementById('btn-reset'); | |
| btn.disabled = true; btn.textContent = 'β³ Resetting...'; | |
| try { | |
| log('start', `[START] task_id=${state.task.id}`); | |
| const res = await fetch('/reset', { | |
| method:'POST', headers:{'Content-Type':'application/json'}, | |
| body: JSON.stringify({ task_id: state.task.id }) | |
| }); | |
| const data = await res.json(); | |
| if (data.error) throw new Error(data.error); | |
| state.episodeId = data.episode_id; | |
| state.step = 0; state.totalReward = 0; state.done = false; | |
| document.getElementById('chip-episode').textContent = (state.episodeId||'').slice(0,8)+'β¦'; | |
| document.getElementById('chip-step').textContent = '0'; | |
| document.getElementById('chip-reward').textContent = '0.0000'; | |
| document.getElementById('chip-done').textContent = 'false'; | |
| renderObs(data.observation || data); | |
| document.getElementById('btn-step').disabled = false; | |
| document.getElementById('btn-step').textContent = 'βΆ Step'; | |
| log('info', `Episode: ${state.episodeId}`); | |
| } catch(e) { | |
| log('error', 'Reset failed: ' + e.message); | |
| } finally { | |
| btn.disabled = false; btn.textContent = 'β³ Reset'; | |
| } | |
| } | |
| async function doStep() { | |
| if (!state.episodeId) { log('error', 'No episode. Press Reset first.'); return; } | |
| if (state.done) { log('info', 'Done. Press Reset for new episode.'); return; } | |
| const action = buildAction(); | |
| if (!action) return; | |
| action.episode_id = state.episodeId; | |
| const btn = document.getElementById('btn-step'); | |
| btn.disabled = true; btn.textContent = 'βΆ Stepping...'; | |
| try { | |
| const res = await fetch('/step', { | |
| method:'POST', headers:{'Content-Type':'application/json'}, | |
| body: JSON.stringify(action) | |
| }); | |
| const data = await res.json(); | |
| const reward = typeof data.reward === 'number' ? data.reward : 0; | |
| const done = data.done === true || data.done === 'True'; | |
| state.step++; state.totalReward += reward; state.done = done; | |
| document.getElementById('chip-step').textContent = state.step; | |
| document.getElementById('chip-reward').textContent = state.totalReward.toFixed(4); | |
| document.getElementById('chip-done').textContent = String(done); | |
| document.getElementById('chip-done').style.color = done ? 'var(--green)' : 'var(--muted)'; | |
| renderObs(data.observation || data); | |
| renderReward(reward, data); | |
| // Auto-switch to next expected action if provided | |
| const nextAction = (data.observation || {}).next_expected_action; | |
| if (nextAction && ACTION_SCHEMAS[nextAction] && state.task.actions.includes(nextAction)) { | |
| state.currentAction = nextAction; | |
| renderActionTabs(); | |
| renderActionFields(); | |
| } | |
| log('step', `[STEP] step=${state.step} action=${action.action_type} reward=${reward.toFixed(4)} done=${done}`); | |
| if (done) { | |
| log('end', `[END] task_id=${state.task.id} total_reward=${state.totalReward.toFixed(4)} steps=${state.step}`); | |
| btn.disabled = true; btn.textContent = 'β Done'; | |
| } | |
| } catch(e) { | |
| log('error', 'Step failed: ' + e.message); | |
| } finally { | |
| if (!state.done) { btn.disabled = false; btn.textContent = 'βΆ Step'; } | |
| } | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| // RENDER | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| function renderObs(obs) { | |
| document.getElementById('obs-view').innerHTML = syntaxHighlight(JSON.stringify(obs, null, 2)); | |
| } | |
| function renderReward(reward, data) { | |
| const r = Math.max(0, Math.min(1, reward)); | |
| const color = r >= 0.7 ? 'var(--green)' : r >= 0.4 ? 'var(--amber)' : 'var(--red)'; | |
| document.getElementById('reward-num').textContent = reward.toFixed(4); | |
| document.getElementById('reward-num').style.color = color; | |
| document.getElementById('reward-bar').style.width = (r*100)+'%'; | |
| document.getElementById('reward-bar').style.background = r >= 0.7 ? 'linear-gradient(90deg,#16a34a,#22c55e)' : r >= 0.4 ? 'linear-gradient(90deg,#b45309,#f59e0b)' : 'linear-gradient(90deg,#991b1b,#ef4444)'; | |
| document.getElementById('reward-label').textContent = r >= 0.7 ? 'β Good' : r >= 0.4 ? 'β Partial' : r > 0 ? 'β Low' : 'β Zero'; | |
| const bd = document.getElementById('reward-breakdown'); | |
| const breakdown = data.reward_breakdown || data.breakdown || null; | |
| if (breakdown && typeof breakdown === 'object') { | |
| bd.innerHTML = '<div style="font-size:10px;font-weight:700;color:var(--muted);text-transform:uppercase;margin:8px 0 4px">Breakdown</div>'; | |
| Object.entries(breakdown).forEach(([k,v]) => { | |
| const pos = v >= 0; | |
| bd.innerHTML += `<div class="breakdown-item"><span>${k.replace(/_/g,' ')}</span><span class="breakdown-val ${pos?'pos':'neg'}">${pos?'+':''}${typeof v==='number'?v.toFixed(4):v}</span></div>`; | |
| }); | |
| } else bd.innerHTML = ''; | |
| const raw = document.getElementById('step-result-raw'); | |
| const filtered = {...data}; delete filtered.observation; | |
| raw.innerHTML = '<div style="font-size:10px;color:var(--muted);margin-top:6px;font-family:var(--mono);white-space:pre-wrap;max-height:120px;overflow-y:auto">' + syntaxHighlight(JSON.stringify(filtered, null, 2)) + '</div>'; | |
| } | |
| function syntaxHighlight(json) { | |
| return json | |
| .replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>') | |
| .replace(/("(\\u[a-zA-Z0-9]{4}|\\[^u]|[^\\"])*"(\s*:)?|\b(true|false|null)\b|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?)/g, m => { | |
| let cls = 'json-num'; | |
| if (/^"/.test(m)) cls = /:$/.test(m) ? 'json-key' : 'json-str'; | |
| else if (/true|false/.test(m)) cls = 'json-bool'; | |
| else if (/null/.test(m)) cls = 'json-null'; | |
| return `<span class="${cls}">${m}</span>`; | |
| }); | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| // LOG | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| function log(type, msg) { | |
| const logEl = document.getElementById('step-log'); | |
| const line = document.createElement('div'); | |
| line.className = 'log-line'; | |
| const now = new Date(); | |
| const t = `${String(now.getHours()).padStart(2,'0')}:${String(now.getMinutes()).padStart(2,'0')}:${String(now.getSeconds()).padStart(2,'0')}`; | |
| const tagMap = {start:'START',step:'STEP',end:'END',error:'ERROR',info:'INFO'}; | |
| line.innerHTML = `<span class="log-time">${t}</span><span class="log-tag ${type}">[${tagMap[type]||type.toUpperCase()}]</span><span class="log-msg">${msg}</span>`; | |
| logEl.appendChild(line); | |
| logEl.scrollTop = logEl.scrollHeight; | |
| } | |
| function clearLog() { | |
| document.getElementById('step-log').innerHTML = ''; | |
| log('info', 'Log cleared.'); | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| // FULL INFERENCE | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| async function runFullInference() { | |
| const btn = document.getElementById('inf-btn'); | |
| btn.disabled = true; btn.textContent = 'β³ Running...'; | |
| const prog = document.getElementById('inf-progress'); | |
| const scores = document.getElementById('inf-scores'); | |
| prog.style.display = 'flex'; prog.innerHTML = ''; | |
| scores.innerHTML = ''; | |
| const allTasks = ['sec_easy','sec_medium','sec_hard','dep_easy','dep_medium','dep_hard','cli_easy','cli_medium','cli_hard']; | |
| allTasks.forEach(t => { prog.innerHTML += `<span class="task-chip" id="chip-inf-${t}">${t}</span>`; }); | |
| log('info', 'Starting full inference via /inference...'); | |
| try { | |
| const res = await fetch('/inference', { method:'POST', headers:{'Content-Type':'application/json'}, body:'{}' }); | |
| const data = await res.json(); | |
| if (data.error) { log('error', 'Inference error: ' + data.error); return; } | |
| const final = data.final_scores || {}; | |
| allTasks.forEach(t => { | |
| const chip = document.getElementById('chip-inf-'+t); | |
| const sc = final[t]; | |
| if (sc !== undefined) { | |
| chip.classList.add(sc > 0.3 ? 'done' : 'fail'); | |
| chip.textContent = `${t}: ${typeof sc==='number'?sc.toFixed(3):sc}`; | |
| } else chip.classList.add('fail'); | |
| }); | |
| const avg = data.average_score || 0; | |
| scores.innerHTML = `<div style="padding:6px;background:var(--bg);border-radius:4px;border:1px solid var(--border)"><span style="font-size:10px;color:var(--muted)">Average: </span><strong style="color:var(--green)">${avg.toFixed ? avg.toFixed(4) : avg}</strong></div>`; | |
| log('end', `Inference done. Average: ${avg}`); | |
| } catch(e) { | |
| log('error', 'Inference failed: ' + e.message); | |
| } finally { | |
| btn.disabled = false; btn.textContent = 'βΆ Run All 9 Tasks'; | |
| } | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| // HEALTH CHECK β uses /reset OPTIONS or simple GET | |
| // βββββββββββββββββββββββββββββββββββββββββββββββ | |
| async function checkServerHealth() { | |
| try { | |
| const res = await fetch('/', { | |
| headers: { 'Accept': 'application/json' }, | |
| signal: AbortSignal.timeout(3000) | |
| }); | |
| if (res.ok) { | |
| document.getElementById('status-dot').className = 'logo-dot green'; | |
| document.getElementById('server-status').textContent = 'Server online'; | |
| document.getElementById('server-status').style.color = 'var(--green)'; | |
| } else throw new Error('not ok'); | |
| } catch(e) { | |
| document.getElementById('status-dot').className = 'logo-dot err'; | |
| document.getElementById('server-status').textContent = 'Server unreachable'; | |
| document.getElementById('server-status').style.color = 'var(--red)'; | |
| } | |
| } | |
| init(); | |
| </script> | |
| </div><!-- end page-debug --> | |
| <!-- ββ PAGE: BENCHMARK ββ --> | |
| <div class="page" id="page-benchmark"> | |
| <div class="bench-layout"> | |
| <!-- Benchmark Sidebar --> | |
| <div class="bench-sidebar"> | |
| <div class="bench-card"> | |
| <div class="bench-card-hdr">π API Configuration</div> | |
| <div class="bench-card-body"> | |
| <label style="font-size:10px;color:var(--muted);margin-bottom:6px;display:block">Quick Presets</label> | |
| <div class="preset-row"> | |
| <button class="preset-btn" onclick="applyPreset('groq')">β‘ Groq</button> | |
| <button class="preset-btn" onclick="applyPreset('openrouter')">π OpenRouter</button> | |
| <button class="preset-btn" onclick="applyPreset('huggingface')">π€ HuggingFace</button> | |
| <button class="preset-btn" onclick="applyPreset('custom')">βοΈ Custom</button> | |
| </div> | |
| <div class="bench-field"> | |
| <label>API Base URL</label> | |
| <input type="text" id="bench-api-base" placeholder="https://api.groq.com/openai/v1" /> | |
| </div> | |
| <div class="bench-field"> | |
| <label>API Key</label> | |
| <input type="password" id="bench-api-key" placeholder="sk-..." /> | |
| </div> | |
| <div class="bench-field"> | |
| <label>Model Display Name</label> | |
| <input type="text" id="bench-model-name" placeholder="Llama-3.3-70B" /> | |
| </div> | |
| <div class="bench-field"> | |
| <label>Model ID</label> | |
| <input type="text" id="bench-model-id" placeholder="llama-3.3-70b-versatile" /> | |
| </div> | |
| </div> | |
| </div> | |
| <button class="run-btn" id="bench-run-btn" onclick="runBenchmark()"> | |
| π Run Benchmark (9 Tasks) | |
| </button> | |
| <div class="bench-card" style="margin-top:12px"> | |
| <div class="bench-card-hdr">π Run History | |
| <button class="btn-ghost" style="margin-left:auto;font-size:9px;padding:2px 6px" onclick="clearResults()">Clear All</button> | |
| </div> | |
| <div class="bench-card-body" id="bench-history" style="max-height:200px;overflow-y:auto"> | |
| <div style="color:var(--muted);font-size:11px;text-align:center;padding:12px">No runs yet. Configure a model above and run.</div> | |
| </div> | |
| </div> | |
| <div class="bench-card"> | |
| <div class="bench-card-hdr">βΉοΈ Tips</div> | |
| <div class="bench-card-body" style="font-size:11px;color:var(--muted);line-height:1.5"> | |
| <p>β’ <strong>Groq</strong> β Fast, free tier, use llama-3.3-70b-versatile</p> | |
| <p>β’ <strong>OpenRouter</strong> β Many models, free tier has rate limits</p> | |
| <p>β’ <strong>HuggingFace</strong> β Use your HF token with router.huggingface.co/v1</p> | |
| <p style="margin-top:6px;color:var(--amber)">β οΈ Free tier models may hit rate limits on 9 tasks</p> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Benchmark Main --> | |
| <div class="bench-main"> | |
| <!-- Results Table --> | |
| <div class="results-area" id="bench-results"> | |
| <div class="empty-state"> | |
| <div class="icon">π</div> | |
| <p>Run a benchmark to see results here. Configure your API key and model on the left, then click Run.</p> | |
| </div> | |
| </div> | |
| <!-- Comparison Chart --> | |
| <div class="chart-container" id="bench-chart" style="display:none"> | |
| <div style="font-size:11px;font-weight:700;color:var(--muted);text-transform:uppercase;letter-spacing:.04em;margin-bottom:10px">Model Comparison β Average Score</div> | |
| <div id="chart-bars"></div> | |
| </div> | |
| <!-- Log --> | |
| <div class="bench-log" id="bench-log"> | |
| <div style="color:var(--muted)">Benchmark logs will appear here...</div> | |
| </div> | |
| </div> | |
| </div> | |
| </div><!-- end page-benchmark --> | |
| <script> | |
| // ββββββββββββββββββββββββββββββββββ | |
| // PAGE SWITCHING | |
| // ββββββββββββββββββββββββββββββββββ | |
| function switchPage(page) { | |
| document.querySelectorAll('.page').forEach(p => p.classList.remove('visible')); | |
| document.querySelectorAll('.page-tab').forEach(t => t.classList.remove('active')); | |
| document.getElementById('page-' + page).classList.add('visible'); | |
| document.getElementById('ptab-' + page).classList.add('active'); | |
| if (page === 'benchmark') loadBenchResults(); | |
| } | |
| // ββββββββββββββββββββββββββββββββββ | |
| // API PRESETS | |
| // ββββββββββββββββββββββββββββββββββ | |
| const PRESETS = { | |
| groq: { base: 'https://api.groq.com/openai/v1', models: ['llama-3.3-70b-versatile','mixtral-8x7b-32768','gemma2-9b-it'], default_name: 'Llama-3.3-70B', default_id: 'llama-3.3-70b-versatile' }, | |
| openrouter: { base: 'https://openrouter.ai/api/v1', models: ['nvidia/nemotron-3-super-120b-a12b:free','qwen/qwen3.6-plus:free','deepseek/deepseek-r1:free'], default_name: 'Nemotron-120B', default_id: 'nvidia/nemotron-3-super-120b-a12b:free' }, | |
| huggingface: { base: 'https://router.huggingface.co/v1', models: ['Qwen/Qwen2.5-72B-Instruct','meta-llama/Llama-3.1-70B-Instruct'], default_name: 'Qwen-2.5-72B', default_id: 'Qwen/Qwen2.5-72B-Instruct' }, | |
| custom: { base: '', models: [], default_name: '', default_id: '' }, | |
| }; | |
| function applyPreset(name) { | |
| document.querySelectorAll('.preset-btn').forEach(b => b.classList.remove('active')); | |
| event.target.classList.add('active'); | |
| const p = PRESETS[name]; | |
| document.getElementById('bench-api-base').value = p.base; | |
| document.getElementById('bench-model-name').value = p.default_name; | |
| document.getElementById('bench-model-id').value = p.default_id; | |
| if (name !== 'custom') document.getElementById('bench-api-key').focus(); | |
| } | |
| // ββββββββββββββββββββββββββββββββββ | |
| // RUN BENCHMARK | |
| // ββββββββββββββββββββββββββββββββββ | |
| let benchRunning = false; | |
| async function runBenchmark() { | |
| if (benchRunning) return; | |
| const apiBase = document.getElementById('bench-api-base').value.trim(); | |
| const apiKey = document.getElementById('bench-api-key').value.trim(); | |
| const modelName = document.getElementById('bench-model-name').value.trim() || 'Unknown'; | |
| const modelId = document.getElementById('bench-model-id').value.trim(); | |
| if (!apiBase || !apiKey || !modelId) { | |
| alert('Please fill in API Base URL, API Key, and Model ID'); | |
| return; | |
| } | |
| benchRunning = true; | |
| const btn = document.getElementById('bench-run-btn'); | |
| btn.disabled = true; | |
| btn.classList.add('running'); | |
| btn.innerHTML = 'β³ Running 9 tasks...'; | |
| const logEl = document.getElementById('bench-log'); | |
| logEl.innerHTML = ''; | |
| benchLog('info', `Starting benchmark: ${modelName} (${modelId})`); | |
| benchLog('info', `API: ${apiBase}`); | |
| benchLog('info', `Running 9 tasks... This may take 2-5 minutes.`); | |
| try { | |
| const res = await fetch('/benchmark/run', { | |
| method: 'POST', | |
| headers: {'Content-Type': 'application/json'}, | |
| body: JSON.stringify({ | |
| model_name: modelName, | |
| model_id: modelId, | |
| api_base: apiBase, | |
| api_key: apiKey, | |
| }) | |
| }); | |
| if (res.headers.get('content-type').includes('application/json')) { | |
| const data = await res.json(); | |
| if (data.error) benchLog('err', 'Error: ' + data.error); | |
| throw new Error('Benchmark failed to start'); | |
| } | |
| const reader = res.body.getReader(); | |
| const decoder = new TextDecoder(); | |
| let done = false; | |
| let buffer = ''; | |
| while (!done) { | |
| const { value, done: readerDone } = await reader.read(); | |
| done = readerDone; | |
| if (value) { | |
| buffer += decoder.decode(value, { stream: true }); | |
| let parts = buffer.split('\n\n'); | |
| buffer = parts.pop(); | |
| for (const part of parts) { | |
| if (part.startsWith('data: ')) { | |
| try { | |
| const event = JSON.parse(part.substring(6)); | |
| if (event.type === 'log') { | |
| benchLog(event.level, event.msg); | |
| } else if (event.type === 'task_done') { | |
| benchLog('info', `π― Task ${event.task_id} completed with score: ${event.score.toFixed(4)}`); | |
| } else if (event.type === 'done') { | |
| benchLog('ok', `β All tasks complete! Average: ${event.result.average}`); | |
| renderResults(); | |
| renderChart(); | |
| } | |
| } catch(e) {} | |
| } | |
| } | |
| } | |
| } | |
| } catch(e) { | |
| benchLog('err', 'Execution error: ' + e.message); | |
| } finally { | |
| benchRunning = false; | |
| btn.disabled = false; | |
| btn.classList.remove('running'); | |
| btn.innerHTML = 'π Run Benchmark (9 Tasks)'; | |
| } | |
| } | |
| function benchLog(type, msg) { | |
| const logEl = document.getElementById('bench-log'); | |
| const cls = type === 'err' ? 'log-err' : type === 'warn' ? 'log-warn' : type === 'ok' ? 'log-ok' : 'log-info'; | |
| const time = new Date().toLocaleTimeString('en-US',{hour12:false,hour:'2-digit',minute:'2-digit',second:'2-digit'}); | |
| logEl.innerHTML += `<div class="${cls}"><span style="color:var(--muted)">${time}</span> ${msg}</div>`; | |
| logEl.scrollTop = logEl.scrollHeight; | |
| } | |
| // ββββββββββββββββββββββββββββββββββ | |
| // RESULTS RENDERING | |
| // ββββββββββββββββββββββββββββββββββ | |
| const BENCH_TASKS = ['sec_easy','sec_medium','sec_hard','dep_easy','dep_medium','dep_hard','cli_easy','cli_medium','cli_hard']; | |
| const BENCH_COLORS = ['#4f8ef7','#a855f7','#22c55e','#f59e0b','#ef4444','#22d3ee','#f472b6','#84cc16','#fb923c']; | |
| async function loadBenchResults() { | |
| try { | |
| const res = await fetch('/benchmark/results'); | |
| const data = await res.json(); | |
| if (data.results && data.results.length > 0) { | |
| renderResults(data.results); | |
| renderChart(data.results); | |
| renderHistory(data.results); | |
| } | |
| } catch(e) {} | |
| } | |
| function renderResults(results) { | |
| if (!results) { | |
| fetch('/benchmark/results').then(r=>r.json()).then(d => { if(d.results) renderResults(d.results); }); | |
| return; | |
| } | |
| if (results.length === 0) return; | |
| const el = document.getElementById('bench-results'); | |
| let html = '<table class="results-table"><thead><tr><th>Model</th>'; | |
| BENCH_TASKS.forEach(t => html += `<th>${t.replace('_',' ').toUpperCase()}</th>`); | |
| html += '<th>AVG</th><th>Time</th></tr></thead><tbody>'; | |
| results.forEach((r, i) => { | |
| html += `<tr>`; | |
| html += `<td style="font-weight:700;color:${BENCH_COLORS[i % BENCH_COLORS.length]}">${r.model_name}</td>`; | |
| BENCH_TASKS.forEach(t => { | |
| const s = r.scores[t] || 0; | |
| const cls = s >= 0.8 ? 'score-high' : s >= 0.4 ? 'score-mid' : 'score-low'; | |
| html += `<td class="score-cell ${cls}">${s.toFixed(2)}</td>`; | |
| }); | |
| const avgCls = r.average >= 0.7 ? 'score-high' : r.average >= 0.4 ? 'score-mid' : 'score-low'; | |
| html += `<td class="score-cell avg-cell ${avgCls}">${r.average.toFixed(3)}</td>`; | |
| const ts = new Date(r.timestamp); | |
| html += `<td style="font-size:10px;color:var(--muted)">${ts.toLocaleTimeString()}</td>`; | |
| html += '</tr>'; | |
| }); | |
| html += '</tbody></table>'; | |
| el.innerHTML = html; | |
| } | |
| function renderChart(results) { | |
| if (!results) { | |
| fetch('/benchmark/results').then(r=>r.json()).then(d => { if(d.results) renderChart(d.results); }); | |
| return; | |
| } | |
| if (results.length === 0) return; | |
| const container = document.getElementById('bench-chart'); | |
| container.style.display = 'block'; | |
| const bars = document.getElementById('chart-bars'); | |
| let html = ''; | |
| results.forEach((r, i) => { | |
| const pct = Math.round(r.average * 100); | |
| const color = BENCH_COLORS[i % BENCH_COLORS.length]; | |
| const gradient = `linear-gradient(90deg, ${color}88, ${color})`; | |
| html += `<div class="chart-bar-row"> | |
| <div class="chart-label">${r.model_name}</div> | |
| <div class="chart-bar-bg"> | |
| <div class="chart-bar-fill" style="width:${pct}%;background:${gradient}">${r.average.toFixed(3)}</div> | |
| </div> | |
| </div>`; | |
| }); | |
| bars.innerHTML = html; | |
| } | |
| function renderHistory(results) { | |
| const el = document.getElementById('bench-history'); | |
| if (!results || results.length === 0) { | |
| el.innerHTML = '<div style="color:var(--muted);font-size:11px;text-align:center;padding:12px">No runs yet.</div>'; | |
| return; | |
| } | |
| let html = ''; | |
| results.forEach((r, i) => { | |
| const avgCls = r.average >= 0.7 ? 'score-high' : r.average >= 0.4 ? 'score-mid' : 'score-low'; | |
| const ts = new Date(r.timestamp); | |
| html += `<div style="display:flex;align-items:center;gap:8px;padding:6px 0;border-bottom:1px solid var(--border);font-size:11px"> | |
| <span style="color:${BENCH_COLORS[i % BENCH_COLORS.length]};font-weight:700">${r.model_name}</span> | |
| <span class="score-cell ${avgCls}" style="margin-left:auto">${r.average.toFixed(3)}</span> | |
| <span style="color:var(--muted);font-size:9px">${ts.toLocaleTimeString()}</span> | |
| </div>`; | |
| }); | |
| el.innerHTML = html; | |
| } | |
| async function clearResults() { | |
| if (!confirm('Clear all benchmark results?')) return; | |
| await fetch('/benchmark/clear', {method:'POST'}); | |
| document.getElementById('bench-results').innerHTML = '<div class="empty-state"><div class="icon">π</div><p>No results. Run a benchmark to see data.</p></div>'; | |
| document.getElementById('bench-chart').style.display = 'none'; | |
| document.getElementById('bench-history').innerHTML = '<div style="color:var(--muted);font-size:11px;text-align:center;padding:12px">No runs yet.</div>'; | |
| } | |
| </script> | |
| </body> | |
| </html> | |