Fix custom prompt UI and token optimizer flow
Browse files- __pycache__/app.cpython-314.pyc +0 -0
- __pycache__/streamlit_app.cpython-314.pyc +0 -0
- app.py +618 -220
- env/__pycache__/environment.cpython-314.pyc +0 -0
- env/environment.py +30 -3
- streamlit_app.py +123 -33
__pycache__/app.cpython-314.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-314.pyc and b/__pycache__/app.cpython-314.pyc differ
|
|
|
__pycache__/streamlit_app.cpython-314.pyc
CHANGED
|
Binary files a/__pycache__/streamlit_app.cpython-314.pyc and b/__pycache__/streamlit_app.cpython-314.pyc differ
|
|
|
app.py
CHANGED
|
@@ -20,6 +20,9 @@ from env.tasks import ALL_TASKS, TASKS_BY_NAME
|
|
| 20 |
|
| 21 |
class ResetRequest(BaseModel):
|
| 22 |
task_name: Literal["single_domain_qa", "cross_domain_synthesis", "adversarial_compression"]
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
@asynccontextmanager
|
|
@@ -49,251 +52,478 @@ app.add_middleware(
|
|
| 49 |
UI_HTML = """
|
| 50 |
<!doctype html>
|
| 51 |
<html lang="en">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
<head>
|
| 53 |
<meta charset="utf-8" />
|
| 54 |
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 55 |
<title>rag-context-optimizer</title>
|
| 56 |
<style>
|
| 57 |
:root {
|
| 58 |
-
--bg: #
|
| 59 |
--panel: #ffffff;
|
| 60 |
-
--ink: #
|
| 61 |
-
--muted: #
|
| 62 |
-
--line: #
|
| 63 |
--accent: #0f766e;
|
| 64 |
-
--accent-
|
| 65 |
--warn: #b45309;
|
|
|
|
| 66 |
}
|
| 67 |
* { box-sizing: border-box; }
|
| 68 |
body {
|
| 69 |
margin: 0;
|
| 70 |
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
|
|
|
| 71 |
color: var(--ink);
|
| 72 |
-
background:
|
| 73 |
-
radial-gradient(circle at top left, #d9f99d 0, transparent 22%),
|
| 74 |
-
radial-gradient(circle at top right, #bfdbfe 0, transparent 25%),
|
| 75 |
-
linear-gradient(180deg, #f8fbff 0%, var(--bg) 100%);
|
| 76 |
-
}
|
| 77 |
-
.shell {
|
| 78 |
-
max-width: 1240px;
|
| 79 |
-
margin: 0 auto;
|
| 80 |
-
padding: 28px 20px 48px;
|
| 81 |
-
}
|
| 82 |
-
.hero {
|
| 83 |
-
display: grid;
|
| 84 |
-
grid-template-columns: 1.2fr 0.8fr;
|
| 85 |
-
gap: 18px;
|
| 86 |
-
margin-bottom: 18px;
|
| 87 |
}
|
|
|
|
|
|
|
|
|
|
| 88 |
.card {
|
| 89 |
background: var(--panel);
|
| 90 |
border: 1px solid var(--line);
|
| 91 |
-
border-radius:
|
| 92 |
padding: 18px;
|
| 93 |
-
box-shadow: 0 10px
|
| 94 |
}
|
| 95 |
h1, h2, h3, p { margin-top: 0; }
|
| 96 |
-
|
| 97 |
-
.
|
| 98 |
-
.stat
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
}
|
| 104 |
-
.stat {
|
| 105 |
-
border: 1px solid var(--line);
|
| 106 |
-
border-radius: 16px;
|
| 107 |
-
padding: 12px;
|
| 108 |
-
background: #fcfdff;
|
| 109 |
-
}
|
| 110 |
-
.stat .label { font-size: 0.82rem; color: var(--muted); }
|
| 111 |
-
.stat .value { font-size: 1.4rem; font-weight: 700; margin-top: 6px; }
|
| 112 |
-
.layout {
|
| 113 |
-
display: grid;
|
| 114 |
-
grid-template-columns: 320px 1fr;
|
| 115 |
-
gap: 18px;
|
| 116 |
-
}
|
| 117 |
-
.task-list { display: grid; gap: 10px; }
|
| 118 |
.task-btn {
|
| 119 |
width: 100%;
|
| 120 |
text-align: left;
|
| 121 |
border: 1px solid var(--line);
|
|
|
|
|
|
|
| 122 |
background: #fff;
|
| 123 |
-
border-radius: 14px;
|
| 124 |
-
padding: 12px 14px;
|
| 125 |
cursor: pointer;
|
| 126 |
-
|
| 127 |
}
|
| 128 |
.task-btn:hover, .task-btn.active {
|
| 129 |
-
border-color: var(--accent-
|
| 130 |
-
|
| 131 |
-
box-shadow: 0 8px 18px rgba(14, 165, 233, 0.12);
|
| 132 |
-
}
|
| 133 |
-
.task-btn .name { font-weight: 700; }
|
| 134 |
-
.task-btn .meta { color: var(--muted); font-size: 0.85rem; margin-top: 4px; }
|
| 135 |
-
.toolbar {
|
| 136 |
-
display: flex;
|
| 137 |
-
flex-wrap: wrap;
|
| 138 |
-
gap: 10px;
|
| 139 |
-
margin-bottom: 14px;
|
| 140 |
}
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
font: inherit;
|
| 145 |
-
}
|
| 146 |
-
button.action {
|
| 147 |
-
background: linear-gradient(135deg, var(--accent), var(--accent-2));
|
| 148 |
-
color: white;
|
| 149 |
-
padding: 10px 14px;
|
| 150 |
cursor: pointer;
|
| 151 |
-
|
|
|
|
| 152 |
font-weight: 700;
|
| 153 |
}
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
.query {
|
| 160 |
-
font-size: 1.05rem;
|
| 161 |
-
font-weight: 600;
|
| 162 |
padding: 14px;
|
| 163 |
-
border-radius: 16px;
|
| 164 |
-
background: #f8fafc;
|
| 165 |
-
border: 1px solid var(--line);
|
| 166 |
-
margin-bottom: 14px;
|
| 167 |
-
}
|
| 168 |
-
.grid {
|
| 169 |
-
display: grid;
|
| 170 |
-
grid-template-columns: repeat(auto-fill, minmax(240px, 1fr));
|
| 171 |
-
gap: 12px;
|
| 172 |
-
}
|
| 173 |
-
.chunk {
|
| 174 |
border: 1px solid var(--line);
|
| 175 |
border-radius: 16px;
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
.chunk.selected {
|
| 180 |
-
border-color: var(--accent);
|
| 181 |
-
background: #f0fdfa;
|
| 182 |
}
|
| 183 |
-
.
|
|
|
|
|
|
|
| 184 |
.pill {
|
| 185 |
display: inline-block;
|
|
|
|
|
|
|
| 186 |
border-radius: 999px;
|
| 187 |
-
|
| 188 |
-
background: #eef6ff;
|
| 189 |
color: #1d4ed8;
|
| 190 |
font-size: 0.75rem;
|
| 191 |
-
margin-right: 6px;
|
| 192 |
-
margin-bottom: 6px;
|
| 193 |
-
}
|
| 194 |
-
.row {
|
| 195 |
-
display: flex;
|
| 196 |
-
gap: 8px;
|
| 197 |
-
align-items: center;
|
| 198 |
-
flex-wrap: wrap;
|
| 199 |
-
margin-top: 10px;
|
| 200 |
-
}
|
| 201 |
-
.answer-box {
|
| 202 |
-
width: 100%;
|
| 203 |
-
min-height: 110px;
|
| 204 |
-
padding: 12px;
|
| 205 |
-
resize: vertical;
|
| 206 |
-
}
|
| 207 |
-
.panel-grid {
|
| 208 |
-
display: grid;
|
| 209 |
-
grid-template-columns: 1fr 1fr;
|
| 210 |
-
gap: 12px;
|
| 211 |
-
margin-top: 14px;
|
| 212 |
}
|
|
|
|
| 213 |
pre {
|
| 214 |
margin: 0;
|
| 215 |
white-space: pre-wrap;
|
| 216 |
word-break: break-word;
|
| 217 |
-
font-size: 0.
|
|
|
|
| 218 |
background: #0f172a;
|
| 219 |
color: #e2e8f0;
|
| 220 |
padding: 14px;
|
| 221 |
-
border-radius:
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
.feedback { color: var(--warn); font-weight: 600; min-height: 24px; }
|
| 225 |
-
@media (max-width: 980px) {
|
| 226 |
-
.hero, .layout, .panel-grid { grid-template-columns: 1fr; }
|
| 227 |
-
.stat-grid { grid-template-columns: 1fr; }
|
| 228 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
</style>
|
| 230 |
</head>
|
| 231 |
<body>
|
| 232 |
-
<div class="
|
| 233 |
-
<
|
| 234 |
-
<
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
</
|
| 241 |
-
<div class="stat
|
| 242 |
-
|
| 243 |
-
<div class="stat"><div class="label">Tokens Used</div><div class="value" id="usedStat">-</div></div>
|
| 244 |
-
<div class="stat"><div class="label">Step</div><div class="value" id="stepStat">-</div></div>
|
| 245 |
-
</div>
|
| 246 |
</div>
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
<
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
</div>
|
| 257 |
-
</div>
|
| 258 |
-
</section>
|
| 259 |
-
|
| 260 |
-
<section class="layout">
|
| 261 |
-
<aside class="card">
|
| 262 |
-
<h3>Tasks</h3>
|
| 263 |
-
<div id="taskList" class="task-list"></div>
|
| 264 |
</aside>
|
| 265 |
|
| 266 |
-
<
|
| 267 |
-
<div class="
|
| 268 |
-
<
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
</div>
|
| 271 |
-
<div id="queryBox" class="query">Choose a task to begin.</div>
|
| 272 |
-
<div class="feedback" id="feedbackBox"></div>
|
| 273 |
|
| 274 |
-
<
|
| 275 |
-
|
|
|
|
|
|
|
|
|
|
| 276 |
|
| 277 |
-
<
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
|
|
|
|
|
|
| 281 |
</div>
|
| 282 |
|
| 283 |
-
<div class="
|
| 284 |
-
<div>
|
| 285 |
<h3>Observation</h3>
|
| 286 |
<pre id="observationPanel">{}</pre>
|
| 287 |
</div>
|
| 288 |
-
<div>
|
| 289 |
<h3>State</h3>
|
| 290 |
<pre id="statePanel">{}</pre>
|
| 291 |
</div>
|
| 292 |
</div>
|
| 293 |
-
</
|
| 294 |
-
</
|
| 295 |
</div>
|
| 296 |
-
|
| 297 |
<script>
|
| 298 |
const taskList = document.getElementById("taskList");
|
| 299 |
const chunkGrid = document.getElementById("chunkGrid");
|
|
@@ -304,121 +534,207 @@ UI_HTML = """
|
|
| 304 |
const budgetStat = document.getElementById("budgetStat");
|
| 305 |
const usedStat = document.getElementById("usedStat");
|
| 306 |
const stepStat = document.getElementById("stepStat");
|
|
|
|
| 307 |
const answerInput = document.getElementById("answerInput");
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
let selectedTask = "single_domain_qa";
|
| 309 |
let currentObservation = null;
|
|
|
|
|
|
|
| 310 |
|
| 311 |
async function fetchJson(url, options = {}) {
|
| 312 |
const response = await fetch(url, {
|
| 313 |
headers: { "Content-Type": "application/json" },
|
| 314 |
-
...options
|
| 315 |
});
|
| 316 |
const body = await response.json();
|
| 317 |
-
if (!response.ok)
|
| 318 |
-
throw new Error(body.detail || JSON.stringify(body));
|
| 319 |
-
}
|
| 320 |
return body;
|
| 321 |
}
|
| 322 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
function renderTasks(tasks) {
|
| 324 |
taskList.innerHTML = "";
|
| 325 |
-
tasks.forEach(task => {
|
| 326 |
const btn = document.createElement("button");
|
|
|
|
| 327 |
btn.className = "task-btn" + (task.name === selectedTask ? " active" : "");
|
| 328 |
-
btn.innerHTML = `
|
| 329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
selectedTask = task.name;
|
| 331 |
renderTasks(tasks);
|
| 332 |
-
|
|
|
|
|
|
|
|
|
|
| 333 |
taskList.appendChild(btn);
|
| 334 |
});
|
| 335 |
}
|
| 336 |
|
| 337 |
function renderObservation(observation) {
|
| 338 |
currentObservation = observation;
|
| 339 |
-
queryBox.textContent = observation.query;
|
| 340 |
budgetStat.textContent = observation.token_budget;
|
| 341 |
usedStat.textContent = observation.total_tokens_used;
|
| 342 |
stepStat.textContent = observation.step_number;
|
| 343 |
-
|
|
|
|
| 344 |
observationPanel.textContent = JSON.stringify(observation, null, 2);
|
| 345 |
|
| 346 |
const selected = new Set(observation.selected_chunks || []);
|
| 347 |
chunkGrid.innerHTML = "";
|
| 348 |
-
observation.available_chunks.forEach(chunk => {
|
| 349 |
const card = document.createElement("div");
|
| 350 |
-
card.className = "chunk" + (selected.has(chunk.chunk_id) ? " selected" : "");
|
| 351 |
-
const
|
| 352 |
-
const selectAction = selected.has(chunk.chunk_id)
|
| 353 |
-
? `<button class="action secondary" data-action="deselect" data-id="${chunk.chunk_id}">Deselect</button>`
|
| 354 |
-
: `<button class="action secondary" data-action="select" data-id="${chunk.chunk_id}">Select</button>`;
|
| 355 |
card.innerHTML = `
|
| 356 |
<h4>${chunk.chunk_id}</h4>
|
| 357 |
-
<div
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
</div>
|
| 363 |
`;
|
| 364 |
chunkGrid.appendChild(card);
|
| 365 |
});
|
| 366 |
|
| 367 |
-
chunkGrid.querySelectorAll("button[data-action]").forEach(
|
| 368 |
-
|
| 369 |
-
const
|
| 370 |
-
const chunkId =
|
| 371 |
-
if (
|
| 372 |
-
await step({ action_type:
|
| 373 |
-
} else
|
| 374 |
-
await step({ action_type:
|
| 375 |
-
} else if (action === "compress") {
|
| 376 |
-
await step({ action_type: "compress_chunk", chunk_id: chunkId, compression_ratio: 0.5 });
|
| 377 |
}
|
| 378 |
});
|
| 379 |
});
|
| 380 |
}
|
| 381 |
|
| 382 |
async function refreshState() {
|
| 383 |
-
|
| 384 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
}
|
| 386 |
|
| 387 |
async function resetTask() {
|
| 388 |
-
const
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
}
|
| 395 |
|
| 396 |
async function step(payload) {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
try {
|
| 398 |
const body = await fetchJson("/step", {
|
| 399 |
method: "POST",
|
| 400 |
-
body: JSON.stringify(payload)
|
| 401 |
});
|
| 402 |
renderObservation(body.observation);
|
|
|
|
| 403 |
if (body.info && body.info.grader_breakdown) {
|
| 404 |
-
feedbackBox.textContent =
|
| 405 |
}
|
| 406 |
await refreshState();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
} catch (error) {
|
| 408 |
feedbackBox.textContent = error.message;
|
| 409 |
}
|
| 410 |
}
|
| 411 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
document.getElementById("resetBtn").addEventListener("click", resetTask);
|
|
|
|
|
|
|
| 413 |
document.getElementById("refreshBtn").addEventListener("click", refreshState);
|
| 414 |
document.getElementById("submitBtn").addEventListener("click", async () => {
|
| 415 |
-
|
|
|
|
| 416 |
});
|
| 417 |
|
| 418 |
(async function init() {
|
| 419 |
-
|
| 420 |
-
renderTasks(
|
| 421 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 422 |
})();
|
| 423 |
</script>
|
| 424 |
</body>
|
|
@@ -436,7 +752,7 @@ async def log_requests(request: Request, call_next):
|
|
| 436 |
|
| 437 |
@app.get("/", response_class=HTMLResponse)
|
| 438 |
async def home_page():
|
| 439 |
-
return HTMLResponse(
|
| 440 |
|
| 441 |
|
| 442 |
def _serialize_observation(observation: Any) -> dict[str, Any]:
|
|
@@ -475,11 +791,83 @@ def _is_bad_action_event(event: str | None) -> bool:
|
|
| 475 |
}
|
| 476 |
|
| 477 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
@app.post("/reset")
|
| 479 |
async def reset_endpoint(payload: ResetRequest):
|
| 480 |
if payload.task_name not in TASKS_BY_NAME:
|
| 481 |
raise HTTPException(status_code=400, detail="Unknown task_name.")
|
| 482 |
-
env = RagContextOptimizerEnv(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 483 |
app.state.env = env
|
| 484 |
result = await env.reset()
|
| 485 |
return _serialize_step_result(result, reset=True)
|
|
@@ -519,11 +907,21 @@ async def tasks_endpoint():
|
|
| 519 |
"description": task.description,
|
| 520 |
"difficulty": task.difficulty,
|
| 521 |
"token_budget": task.token_budget,
|
|
|
|
|
|
|
| 522 |
}
|
| 523 |
for task in ALL_TASKS
|
| 524 |
]
|
| 525 |
|
| 526 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 527 |
if __name__ == "__main__":
|
| 528 |
import uvicorn
|
| 529 |
|
|
|
|
| 20 |
|
| 21 |
class ResetRequest(BaseModel):
|
| 22 |
task_name: Literal["single_domain_qa", "cross_domain_synthesis", "adversarial_compression"]
|
| 23 |
+
custom_query: str | None = None
|
| 24 |
+
token_budget: int | None = None
|
| 25 |
+
max_steps: int | None = None
|
| 26 |
|
| 27 |
|
| 28 |
@asynccontextmanager
|
|
|
|
| 52 |
UI_HTML = """
|
| 53 |
<!doctype html>
|
| 54 |
<html lang="en">
|
| 55 |
+
<head>
|
| 56 |
+
<meta charset="utf-8" />
|
| 57 |
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 58 |
+
<title>rag-context-optimizer</title>
|
| 59 |
+
<style>
|
| 60 |
+
:root { --bg:#f5f7fb; --panel:#fff; --ink:#132238; --muted:#64748b; --line:#dbe4f0; --accent:#0f766e; --accent2:#0284c7; }
|
| 61 |
+
* { box-sizing:border-box; }
|
| 62 |
+
body { margin:0; font-family:ui-sans-serif,system-ui,Segoe UI,sans-serif; color:var(--ink); background:linear-gradient(180deg,#f8fbff,var(--bg)); }
|
| 63 |
+
.wrap { max-width:1280px; margin:0 auto; padding:24px; }
|
| 64 |
+
.hero, .main, .panels { display:grid; gap:16px; }
|
| 65 |
+
.hero { grid-template-columns:1.2fr 0.8fr; margin-bottom:16px; }
|
| 66 |
+
.main { grid-template-columns:320px 1fr; }
|
| 67 |
+
.panels { grid-template-columns:1fr 1fr; margin-top:16px; }
|
| 68 |
+
.card { background:var(--panel); border:1px solid var(--line); border-radius:20px; padding:18px; box-shadow:0 10px 24px rgba(15,23,42,.05); }
|
| 69 |
+
h1,h2,h3,p { margin-top:0; }
|
| 70 |
+
.muted { color:var(--muted); line-height:1.5; }
|
| 71 |
+
.stats { display:grid; grid-template-columns:repeat(3,1fr); gap:12px; margin-top:14px; }
|
| 72 |
+
.stat { border:1px solid var(--line); border-radius:16px; padding:12px; }
|
| 73 |
+
.stat .k { font-size:.82rem; color:var(--muted); }
|
| 74 |
+
.stat .v { font-size:1.35rem; font-weight:700; margin-top:6px; }
|
| 75 |
+
.task-list { display:grid; gap:10px; }
|
| 76 |
+
.task-btn { width:100%; text-align:left; border:1px solid var(--line); border-radius:14px; padding:12px; background:#fff; cursor:pointer; }
|
| 77 |
+
.task-btn.active, .task-btn:hover { border-color:var(--accent2); box-shadow:0 6px 14px rgba(2,132,199,.12); }
|
| 78 |
+
.toolbar, .row { display:flex; gap:10px; flex-wrap:wrap; align-items:center; }
|
| 79 |
+
.toolbar { margin-bottom:12px; }
|
| 80 |
+
button, input, textarea { font:inherit; border-radius:12px; }
|
| 81 |
+
button { border:none; padding:10px 14px; cursor:pointer; font-weight:700; }
|
| 82 |
+
.primary { background:linear-gradient(135deg,var(--accent),var(--accent2)); color:#fff; }
|
| 83 |
+
.secondary { background:#fff; color:var(--ink); border:1px solid var(--line); }
|
| 84 |
+
input, textarea { width:100%; border:1px solid var(--line); padding:12px; }
|
| 85 |
+
textarea { min-height:96px; resize:vertical; }
|
| 86 |
+
.query { padding:14px; background:#f8fafc; border:1px solid var(--line); border-radius:16px; font-weight:600; margin-bottom:12px; }
|
| 87 |
+
.feedback { min-height:22px; color:#b45309; font-weight:600; }
|
| 88 |
+
.chunks { display:grid; grid-template-columns:repeat(auto-fill,minmax(250px,1fr)); gap:12px; }
|
| 89 |
+
.chunk { border:1px solid var(--line); border-radius:16px; padding:12px; background:#fff; }
|
| 90 |
+
.chunk.selected { border-color:var(--accent); background:#f0fdfa; }
|
| 91 |
+
.pill { display:inline-block; margin:0 6px 6px 0; padding:3px 8px; border-radius:999px; background:#eef6ff; color:#1d4ed8; font-size:.75rem; }
|
| 92 |
+
details { margin-top:16px; }
|
| 93 |
+
pre { white-space:pre-wrap; word-break:break-word; font-size:.86rem; background:#0f172a; color:#e2e8f0; padding:14px; border-radius:14px; overflow:auto; }
|
| 94 |
+
.small-input { width:160px; }
|
| 95 |
+
@media (max-width:980px) { .hero,.main,.panels,.stats { grid-template-columns:1fr; } .small-input { width:100%; } }
|
| 96 |
+
</style>
|
| 97 |
+
</head>
|
| 98 |
+
<body>
|
| 99 |
+
<div class="wrap">
|
| 100 |
+
<div class="hero">
|
| 101 |
+
<div class="card">
|
| 102 |
+
<h1>RAG Context Optimizer</h1>
|
| 103 |
+
<p class="muted">Use any prompt you want. The UI can reset a session with your custom query, token budget, and step limit, then help minimize token usage by selecting only high-value chunks and compressing heavy evidence before answering.</p>
|
| 104 |
+
<div class="stats">
|
| 105 |
+
<div class="stat"><div class="k">Token Budget</div><div class="v" id="budgetStat">-</div></div>
|
| 106 |
+
<div class="stat"><div class="k">Tokens Used</div><div class="v" id="usedStat">-</div></div>
|
| 107 |
+
<div class="stat"><div class="k">Step</div><div class="v" id="stepStat">-</div></div>
|
| 108 |
+
</div>
|
| 109 |
+
</div>
|
| 110 |
+
<div class="card">
|
| 111 |
+
<h3>How To Use It</h3>
|
| 112 |
+
<p class="muted">Pick a task as a corpus shape, then overwrite the prompt with anything you want. Use Auto Optimize to choose chunks by relevance-per-token, or run manually. The API endpoints remain available at <code>/reset</code>, <code>/step</code>, and <code>/state</code>.</p>
|
| 113 |
+
<div class="row">
|
| 114 |
+
<a href="/docs" target="_blank"><button class="secondary" type="button">API Docs</button></a>
|
| 115 |
+
<a href="/health" target="_blank"><button class="secondary" type="button">Health</button></a>
|
| 116 |
+
</div>
|
| 117 |
+
</div>
|
| 118 |
+
</div>
|
| 119 |
+
|
| 120 |
+
<div class="main">
|
| 121 |
+
<aside class="card">
|
| 122 |
+
<h3>Task Presets</h3>
|
| 123 |
+
<div id="taskList" class="task-list"></div>
|
| 124 |
+
</aside>
|
| 125 |
+
|
| 126 |
+
<section class="card">
|
| 127 |
+
<div class="toolbar">
|
| 128 |
+
<button id="resetBtn" class="primary" type="button">Start / Reset</button>
|
| 129 |
+
<button id="autoStepBtn" class="secondary" type="button">Auto Optimize Step</button>
|
| 130 |
+
<button id="autoRunBtn" class="secondary" type="button">Auto Run</button>
|
| 131 |
+
<button id="refreshBtn" class="secondary" type="button">Refresh State</button>
|
| 132 |
+
</div>
|
| 133 |
+
<div class="row">
|
| 134 |
+
<input id="tokenBudgetInput" class="small-input" type="number" min="50" step="10" placeholder="Token budget" />
|
| 135 |
+
<input id="maxStepsInput" class="small-input" type="number" min="1" step="1" placeholder="Max steps" />
|
| 136 |
+
</div>
|
| 137 |
+
<div style="margin-top:12px;">
|
| 138 |
+
<label for="customQueryInput"><strong>Prompt / Query</strong></label>
|
| 139 |
+
<textarea id="customQueryInput" placeholder="Enter any prompt you want to optimize for minimal token usage."></textarea>
|
| 140 |
+
</div>
|
| 141 |
+
<div id="queryBox" class="query">Set a prompt and press Start / Reset.</div>
|
| 142 |
+
<div id="feedbackBox" class="feedback"></div>
|
| 143 |
+
|
| 144 |
+
<h3>Available Chunks</h3>
|
| 145 |
+
<div id="chunkGrid" class="chunks"></div>
|
| 146 |
+
|
| 147 |
+
<h3 style="margin-top:18px;">Answer</h3>
|
| 148 |
+
<textarea id="answerInput" placeholder="Write your final answer here."></textarea>
|
| 149 |
+
<div class="row" style="margin-top:10px;">
|
| 150 |
+
<button id="submitBtn" class="primary" type="button">Submit Answer</button>
|
| 151 |
+
</div>
|
| 152 |
+
|
| 153 |
+
<div class="panels">
|
| 154 |
+
<details class="card" open>
|
| 155 |
+
<summary><strong>Observation</strong></summary>
|
| 156 |
+
<pre id="observationPanel">{}</pre>
|
| 157 |
+
</details>
|
| 158 |
+
<details class="card">
|
| 159 |
+
<summary><strong>State</strong></summary>
|
| 160 |
+
<pre id="statePanel">{}</pre>
|
| 161 |
+
</details>
|
| 162 |
+
</div>
|
| 163 |
+
</section>
|
| 164 |
+
</div>
|
| 165 |
+
</div>
|
| 166 |
+
<script>
|
| 167 |
+
const taskList = document.getElementById("taskList");
|
| 168 |
+
const chunkGrid = document.getElementById("chunkGrid");
|
| 169 |
+
const queryBox = document.getElementById("queryBox");
|
| 170 |
+
const observationPanel = document.getElementById("observationPanel");
|
| 171 |
+
const statePanel = document.getElementById("statePanel");
|
| 172 |
+
const feedbackBox = document.getElementById("feedbackBox");
|
| 173 |
+
const budgetStat = document.getElementById("budgetStat");
|
| 174 |
+
const usedStat = document.getElementById("usedStat");
|
| 175 |
+
const stepStat = document.getElementById("stepStat");
|
| 176 |
+
const answerInput = document.getElementById("answerInput");
|
| 177 |
+
const customQueryInput = document.getElementById("customQueryInput");
|
| 178 |
+
const tokenBudgetInput = document.getElementById("tokenBudgetInput");
|
| 179 |
+
const maxStepsInput = document.getElementById("maxStepsInput");
|
| 180 |
+
let selectedTask = "single_domain_qa";
|
| 181 |
+
let currentObservation = null;
|
| 182 |
+
let allTasks = [];
|
| 183 |
+
|
| 184 |
+
async function fetchJson(url, options = {}) {
|
| 185 |
+
const response = await fetch(url, { headers: { "Content-Type": "application/json" }, ...options });
|
| 186 |
+
const body = await response.json();
|
| 187 |
+
if (!response.ok) throw new Error(body.detail || JSON.stringify(body));
|
| 188 |
+
return body;
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
function syncInputsFromTask() {
|
| 192 |
+
const task = allTasks.find(item => item.name === selectedTask);
|
| 193 |
+
if (!task) return;
|
| 194 |
+
if (!customQueryInput.dataset.userEdited || !customQueryInput.value.trim()) customQueryInput.value = task.query;
|
| 195 |
+
tokenBudgetInput.value = task.token_budget;
|
| 196 |
+
maxStepsInput.value = task.max_steps;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
function renderTasks(tasks) {
|
| 200 |
+
taskList.innerHTML = "";
|
| 201 |
+
tasks.forEach(task => {
|
| 202 |
+
const btn = document.createElement("button");
|
| 203 |
+
btn.className = "task-btn" + (task.name === selectedTask ? " active" : "");
|
| 204 |
+
btn.innerHTML = `<div><strong>${task.name}</strong></div><div class="muted">${task.difficulty} · budget ${task.token_budget} · max steps ${task.max_steps}</div><div class="muted">${task.description}</div>`;
|
| 205 |
+
btn.onclick = () => {
|
| 206 |
+
selectedTask = task.name;
|
| 207 |
+
customQueryInput.dataset.userEdited = "";
|
| 208 |
+
renderTasks(tasks);
|
| 209 |
+
syncInputsFromTask();
|
| 210 |
+
};
|
| 211 |
+
taskList.appendChild(btn);
|
| 212 |
+
});
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
function renderObservation(observation) {
|
| 216 |
+
currentObservation = observation;
|
| 217 |
+
queryBox.textContent = observation.query;
|
| 218 |
+
budgetStat.textContent = observation.token_budget;
|
| 219 |
+
usedStat.textContent = observation.total_tokens_used;
|
| 220 |
+
stepStat.textContent = observation.step_number;
|
| 221 |
+
feedbackBox.textContent = observation.last_action_feedback || "";
|
| 222 |
+
observationPanel.textContent = JSON.stringify(observation, null, 2);
|
| 223 |
+
const selected = new Set(observation.selected_chunks || []);
|
| 224 |
+
chunkGrid.innerHTML = "";
|
| 225 |
+
observation.available_chunks.forEach(chunk => {
|
| 226 |
+
const card = document.createElement("div");
|
| 227 |
+
card.className = "chunk" + (selected.has(chunk.chunk_id) ? " selected" : "");
|
| 228 |
+
card.innerHTML = `
|
| 229 |
+
<h4>${chunk.chunk_id}</h4>
|
| 230 |
+
<div class="row"><span class="pill">${chunk.domain}</span><span class="pill">${chunk.tokens} tokens</span></div>
|
| 231 |
+
<div>${chunk.keywords.map(keyword => `<span class="pill">${keyword}</span>`).join("")}</div>
|
| 232 |
+
<div class="row" style="margin-top:10px;">
|
| 233 |
+
${selected.has(chunk.chunk_id)
|
| 234 |
+
? `<button class="secondary" data-action="deselect" data-id="${chunk.chunk_id}" type="button">Deselect</button>`
|
| 235 |
+
: `<button class="secondary" data-action="select" data-id="${chunk.chunk_id}" type="button">Select</button>`}
|
| 236 |
+
<button class="secondary" data-action="compress" data-id="${chunk.chunk_id}" type="button">Compress 50%</button>
|
| 237 |
+
</div>
|
| 238 |
+
`;
|
| 239 |
+
chunkGrid.appendChild(card);
|
| 240 |
+
});
|
| 241 |
+
chunkGrid.querySelectorAll("button[data-action]").forEach(btn => {
|
| 242 |
+
btn.addEventListener("click", async () => {
|
| 243 |
+
const action = btn.dataset.action;
|
| 244 |
+
const chunkId = btn.dataset.id;
|
| 245 |
+
if (action === "select") await step({ action_type: "select_chunk", chunk_id: chunkId });
|
| 246 |
+
if (action === "deselect") await step({ action_type: "deselect_chunk", chunk_id: chunkId });
|
| 247 |
+
if (action === "compress") await step({ action_type: "compress_chunk", chunk_id: chunkId, compression_ratio: 0.5 });
|
| 248 |
+
});
|
| 249 |
+
});
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
async function refreshState() {
|
| 253 |
+
const state = await fetchJson("/state", { method: "GET" });
|
| 254 |
+
statePanel.textContent = JSON.stringify(state, null, 2);
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
async function resetTask() {
|
| 258 |
+
try {
|
| 259 |
+
const body = await fetchJson("/reset", {
|
| 260 |
+
method: "POST",
|
| 261 |
+
body: JSON.stringify({
|
| 262 |
+
task_name: selectedTask,
|
| 263 |
+
custom_query: customQueryInput.value,
|
| 264 |
+
token_budget: tokenBudgetInput.value ? Number(tokenBudgetInput.value) : null,
|
| 265 |
+
max_steps: maxStepsInput.value ? Number(maxStepsInput.value) : null
|
| 266 |
+
}),
|
| 267 |
+
});
|
| 268 |
+
renderObservation(body.observation);
|
| 269 |
+
await refreshState();
|
| 270 |
+
} catch (error) {
|
| 271 |
+
feedbackBox.textContent = error.message;
|
| 272 |
+
}
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
async function step(payload) {
|
| 276 |
+
try {
|
| 277 |
+
const body = await fetchJson("/step", { method: "POST", body: JSON.stringify(payload) });
|
| 278 |
+
renderObservation(body.observation);
|
| 279 |
+
if (body.info && body.info.grader_breakdown) {
|
| 280 |
+
feedbackBox.textContent = `Final score: ${Number(body.reward).toFixed(4)} | ${JSON.stringify(body.info.grader_breakdown)}`;
|
| 281 |
+
}
|
| 282 |
+
await refreshState();
|
| 283 |
+
} catch (error) {
|
| 284 |
+
feedbackBox.textContent = error.message;
|
| 285 |
+
}
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
async function optimizeStep() {
|
| 289 |
+
try {
|
| 290 |
+
const suggestion = await fetchJson("/optimize-step", { method: "POST" });
|
| 291 |
+
feedbackBox.textContent = "Optimizer suggestion: " + JSON.stringify(suggestion);
|
| 292 |
+
await step(suggestion);
|
| 293 |
+
} catch (error) {
|
| 294 |
+
feedbackBox.textContent = error.message;
|
| 295 |
+
}
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
async function autoRun() {
|
| 299 |
+
for (let i = 0; i < 12; i += 1) {
|
| 300 |
+
if (!currentObservation) break;
|
| 301 |
+
const suggestion = await fetchJson("/optimize-step", { method: "POST" });
|
| 302 |
+
await step(suggestion);
|
| 303 |
+
if (suggestion.action_type === "submit_answer") break;
|
| 304 |
+
}
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
document.getElementById("resetBtn").addEventListener("click", resetTask);
|
| 308 |
+
document.getElementById("autoStepBtn").addEventListener("click", optimizeStep);
|
| 309 |
+
document.getElementById("autoRunBtn").addEventListener("click", autoRun);
|
| 310 |
+
document.getElementById("refreshBtn").addEventListener("click", refreshState);
|
| 311 |
+
document.getElementById("submitBtn").addEventListener("click", async () => {
|
| 312 |
+
await step({ action_type: "submit_answer", answer: answerInput.value || "A concise answer synthesized from the selected evidence." });
|
| 313 |
+
});
|
| 314 |
+
customQueryInput.addEventListener("input", () => { customQueryInput.dataset.userEdited = "true"; });
|
| 315 |
+
|
| 316 |
+
(async function init() {
|
| 317 |
+
allTasks = await fetchJson("/tasks", { method: "GET" });
|
| 318 |
+
renderTasks(allTasks);
|
| 319 |
+
syncInputsFromTask();
|
| 320 |
+
await resetTask();
|
| 321 |
+
})();
|
| 322 |
+
</script>
|
| 323 |
+
</body>
|
| 324 |
+
</html>
|
| 325 |
+
"""
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
UI_HTML_V2 = """
|
| 329 |
+
<!doctype html>
|
| 330 |
+
<html lang="en">
|
| 331 |
<head>
|
| 332 |
<meta charset="utf-8" />
|
| 333 |
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
| 334 |
<title>rag-context-optimizer</title>
|
| 335 |
<style>
|
| 336 |
:root {
|
| 337 |
+
--bg: #f3f7fb;
|
| 338 |
--panel: #ffffff;
|
| 339 |
+
--ink: #10243c;
|
| 340 |
+
--muted: #61748a;
|
| 341 |
+
--line: #d8e2ef;
|
| 342 |
--accent: #0f766e;
|
| 343 |
+
--accent-alt: #0369a1;
|
| 344 |
--warn: #b45309;
|
| 345 |
+
--selected: #ecfeff;
|
| 346 |
}
|
| 347 |
* { box-sizing: border-box; }
|
| 348 |
body {
|
| 349 |
margin: 0;
|
| 350 |
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
| 351 |
+
background: radial-gradient(circle at top left, #ffffff 0%, var(--bg) 65%);
|
| 352 |
color: var(--ink);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
}
|
| 354 |
+
.wrap { max-width: 1360px; margin: 0 auto; padding: 24px; }
|
| 355 |
+
.grid { display: grid; grid-template-columns: 320px 1fr; gap: 16px; }
|
| 356 |
+
.stack { display: grid; gap: 16px; }
|
| 357 |
.card {
|
| 358 |
background: var(--panel);
|
| 359 |
border: 1px solid var(--line);
|
| 360 |
+
border-radius: 22px;
|
| 361 |
padding: 18px;
|
| 362 |
+
box-shadow: 0 10px 24px rgba(15, 23, 42, 0.05);
|
| 363 |
}
|
| 364 |
h1, h2, h3, p { margin-top: 0; }
|
| 365 |
+
.muted { color: var(--muted); line-height: 1.5; }
|
| 366 |
+
.stats { display: grid; grid-template-columns: repeat(4, 1fr); gap: 12px; margin-top: 16px; }
|
| 367 |
+
.stat { border: 1px solid var(--line); border-radius: 16px; padding: 12px; background: #fbfdff; }
|
| 368 |
+
.stat .label { font-size: 0.8rem; color: var(--muted); }
|
| 369 |
+
.stat .value { margin-top: 6px; font-size: 1.35rem; font-weight: 700; }
|
| 370 |
+
.task-list, .chunk-grid { display: grid; gap: 12px; }
|
| 371 |
+
.chunk-grid { grid-template-columns: repeat(auto-fill, minmax(255px, 1fr)); }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
.task-btn {
|
| 373 |
width: 100%;
|
| 374 |
text-align: left;
|
| 375 |
border: 1px solid var(--line);
|
| 376 |
+
border-radius: 16px;
|
| 377 |
+
padding: 12px;
|
| 378 |
background: #fff;
|
|
|
|
|
|
|
| 379 |
cursor: pointer;
|
| 380 |
+
font: inherit;
|
| 381 |
}
|
| 382 |
.task-btn:hover, .task-btn.active {
|
| 383 |
+
border-color: var(--accent-alt);
|
| 384 |
+
box-shadow: 0 8px 18px rgba(3, 105, 161, 0.10);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
}
|
| 386 |
+
.toolbar, .row { display: flex; gap: 10px; flex-wrap: wrap; align-items: center; }
|
| 387 |
+
button, input, textarea { border-radius: 12px; font: inherit; }
|
| 388 |
+
button {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
cursor: pointer;
|
| 390 |
+
padding: 10px 14px;
|
| 391 |
+
border: 1px solid transparent;
|
| 392 |
font-weight: 700;
|
| 393 |
}
|
| 394 |
+
.primary { background: linear-gradient(135deg, var(--accent), var(--accent-alt)); color: #fff; }
|
| 395 |
+
.secondary { background: #fff; border-color: var(--line); color: var(--ink); }
|
| 396 |
+
input, textarea { width: 100%; border: 1px solid var(--line); padding: 12px; background: #fff; }
|
| 397 |
+
textarea { resize: vertical; min-height: 110px; }
|
| 398 |
+
.query-box {
|
|
|
|
|
|
|
|
|
|
| 399 |
padding: 14px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
border: 1px solid var(--line);
|
| 401 |
border-radius: 16px;
|
| 402 |
+
background: #f8fafc;
|
| 403 |
+
font-weight: 600;
|
| 404 |
+
min-height: 60px;
|
|
|
|
|
|
|
|
|
|
| 405 |
}
|
| 406 |
+
.feedback { min-height: 24px; color: var(--warn); font-weight: 600; }
|
| 407 |
+
.chunk-card { border: 1px solid var(--line); border-radius: 18px; padding: 12px; background: #fff; }
|
| 408 |
+
.chunk-card.selected { border-color: var(--accent); background: var(--selected); }
|
| 409 |
.pill {
|
| 410 |
display: inline-block;
|
| 411 |
+
margin: 0 6px 6px 0;
|
| 412 |
+
padding: 4px 8px;
|
| 413 |
border-radius: 999px;
|
| 414 |
+
background: #eff6ff;
|
|
|
|
| 415 |
color: #1d4ed8;
|
| 416 |
font-size: 0.75rem;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
}
|
| 418 |
+
.json-panels { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-top: 16px; }
|
| 419 |
pre {
|
| 420 |
margin: 0;
|
| 421 |
white-space: pre-wrap;
|
| 422 |
word-break: break-word;
|
| 423 |
+
font-size: 0.86rem;
|
| 424 |
+
line-height: 1.45;
|
| 425 |
background: #0f172a;
|
| 426 |
color: #e2e8f0;
|
| 427 |
padding: 14px;
|
| 428 |
+
border-radius: 14px;
|
| 429 |
+
overflow: auto;
|
| 430 |
+
max-height: 420px;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
}
|
| 432 |
+
.helper-list { padding-left: 18px; margin: 0; color: var(--muted); }
|
| 433 |
+
.helper-list li + li { margin-top: 8px; }
|
| 434 |
+
.mono { font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; }
|
| 435 |
+
@media (max-width: 980px) { .grid, .json-panels, .stats { grid-template-columns: 1fr; } }
|
| 436 |
</style>
|
| 437 |
</head>
|
| 438 |
<body>
|
| 439 |
+
<div class="wrap">
|
| 440 |
+
<div class="card" style="margin-bottom:16px;">
|
| 441 |
+
<h1>RAG Context Optimizer</h1>
|
| 442 |
+
<p class="muted">
|
| 443 |
+
This UI works with any prompt you provide. Pick a preset only to choose a corpus shape and default budget, then replace the query with your own prompt and let the optimizer choose the highest-value evidence for the fewest tokens possible.
|
| 444 |
+
</p>
|
| 445 |
+
<div class="stats">
|
| 446 |
+
<div class="stat"><div class="label">Task Preset</div><div class="value" id="taskStat">-</div></div>
|
| 447 |
+
<div class="stat"><div class="label">Token Budget</div><div class="value" id="budgetStat">-</div></div>
|
| 448 |
+
<div class="stat"><div class="label">Tokens Used</div><div class="value" id="usedStat">-</div></div>
|
| 449 |
+
<div class="stat"><div class="label">Step</div><div class="value" id="stepStat">-</div></div>
|
|
|
|
|
|
|
|
|
|
| 450 |
</div>
|
| 451 |
+
</div>
|
| 452 |
+
|
| 453 |
+
<div class="grid">
|
| 454 |
+
<aside class="stack">
|
| 455 |
+
<div class="card">
|
| 456 |
+
<h3>Task Presets</h3>
|
| 457 |
+
<p class="muted">These only set the default corpus flavor, budget, and step limit. Your prompt can be anything.</p>
|
| 458 |
+
<div id="taskList" class="task-list"></div>
|
| 459 |
+
</div>
|
| 460 |
+
<div class="card">
|
| 461 |
+
<h3>How It Optimizes</h3>
|
| 462 |
+
<ul class="helper-list">
|
| 463 |
+
<li>Ranks chunks by relevance per token, not just raw overlap.</li>
|
| 464 |
+
<li>Compresses heavier evidence before wasting budget.</li>
|
| 465 |
+
<li>Stops early once it has enough support to answer.</li>
|
| 466 |
+
<li>Keeps the OpenEnv endpoints live at <span class="mono">/reset</span>, <span class="mono">/step</span>, and <span class="mono">/state</span>.</li>
|
| 467 |
+
</ul>
|
| 468 |
+
<div class="toolbar" style="margin-top:16px;">
|
| 469 |
+
<a href="/docs" target="_blank"><button class="secondary" type="button">API Docs</button></a>
|
| 470 |
+
<a href="/health" target="_blank"><button class="secondary" type="button">Health</button></a>
|
| 471 |
+
</div>
|
| 472 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 473 |
</aside>
|
| 474 |
|
| 475 |
+
<section class="stack">
|
| 476 |
+
<div class="card">
|
| 477 |
+
<div class="toolbar" style="margin-bottom:12px;">
|
| 478 |
+
<button id="resetBtn" class="primary" type="button">Start / Reset</button>
|
| 479 |
+
<button id="autoStepBtn" class="secondary" type="button">Auto Optimize Step</button>
|
| 480 |
+
<button id="autoRunBtn" class="secondary" type="button">Auto Run</button>
|
| 481 |
+
<button id="refreshBtn" class="secondary" type="button">Refresh State</button>
|
| 482 |
+
</div>
|
| 483 |
+
<div class="row">
|
| 484 |
+
<input id="tokenBudgetInput" type="number" min="50" step="10" placeholder="Token budget" style="max-width:180px;" />
|
| 485 |
+
<input id="maxStepsInput" type="number" min="1" step="1" placeholder="Max steps" style="max-width:180px;" />
|
| 486 |
+
</div>
|
| 487 |
+
<div style="margin-top:12px;">
|
| 488 |
+
<label for="customQueryInput"><strong>Custom Prompt</strong></label>
|
| 489 |
+
<textarea id="customQueryInput" placeholder="Enter any prompt you want to optimize for minimal token usage."></textarea>
|
| 490 |
+
</div>
|
| 491 |
+
<div style="margin-top:12px;">
|
| 492 |
+
<label for="answerInput"><strong>Manual Final Answer</strong></label>
|
| 493 |
+
<textarea id="answerInput" placeholder="Optional. If left blank, auto-optimize will still submit an answer."></textarea>
|
| 494 |
+
</div>
|
| 495 |
+
<div class="toolbar" style="margin-top:12px;">
|
| 496 |
+
<button id="submitBtn" class="primary" type="button">Submit Manual Answer</button>
|
| 497 |
+
</div>
|
| 498 |
</div>
|
|
|
|
|
|
|
| 499 |
|
| 500 |
+
<div class="card">
|
| 501 |
+
<h3>Active Query</h3>
|
| 502 |
+
<div id="queryBox" class="query-box">No episode has started yet. Enter your prompt, then press Start / Reset.</div>
|
| 503 |
+
<div id="feedbackBox" class="feedback" style="margin-top:12px;"></div>
|
| 504 |
+
</div>
|
| 505 |
|
| 506 |
+
<div class="card">
|
| 507 |
+
<div class="toolbar" style="justify-content:space-between;">
|
| 508 |
+
<h3 style="margin-bottom:0;">Available Chunks</h3>
|
| 509 |
+
<div class="muted">Select manually or let Auto Optimize choose the best evidence per token.</div>
|
| 510 |
+
</div>
|
| 511 |
+
<div id="chunkGrid" class="chunk-grid" style="margin-top:12px;"></div>
|
| 512 |
</div>
|
| 513 |
|
| 514 |
+
<div class="json-panels">
|
| 515 |
+
<div class="card">
|
| 516 |
<h3>Observation</h3>
|
| 517 |
<pre id="observationPanel">{}</pre>
|
| 518 |
</div>
|
| 519 |
+
<div class="card">
|
| 520 |
<h3>State</h3>
|
| 521 |
<pre id="statePanel">{}</pre>
|
| 522 |
</div>
|
| 523 |
</div>
|
| 524 |
+
</section>
|
| 525 |
+
</div>
|
| 526 |
</div>
|
|
|
|
| 527 |
<script>
|
| 528 |
const taskList = document.getElementById("taskList");
|
| 529 |
const chunkGrid = document.getElementById("chunkGrid");
|
|
|
|
| 534 |
const budgetStat = document.getElementById("budgetStat");
|
| 535 |
const usedStat = document.getElementById("usedStat");
|
| 536 |
const stepStat = document.getElementById("stepStat");
|
| 537 |
+
const taskStat = document.getElementById("taskStat");
|
| 538 |
const answerInput = document.getElementById("answerInput");
|
| 539 |
+
const customQueryInput = document.getElementById("customQueryInput");
|
| 540 |
+
const tokenBudgetInput = document.getElementById("tokenBudgetInput");
|
| 541 |
+
const maxStepsInput = document.getElementById("maxStepsInput");
|
| 542 |
+
|
| 543 |
let selectedTask = "single_domain_qa";
|
| 544 |
let currentObservation = null;
|
| 545 |
+
let currentDone = false;
|
| 546 |
+
let allTasks = [];
|
| 547 |
|
| 548 |
async function fetchJson(url, options = {}) {
|
| 549 |
const response = await fetch(url, {
|
| 550 |
headers: { "Content-Type": "application/json" },
|
| 551 |
+
...options
|
| 552 |
});
|
| 553 |
const body = await response.json();
|
| 554 |
+
if (!response.ok) throw new Error(body.detail || JSON.stringify(body));
|
|
|
|
|
|
|
| 555 |
return body;
|
| 556 |
}
|
| 557 |
|
| 558 |
+
function getSelectedTaskMeta() {
|
| 559 |
+
return allTasks.find((task) => task.name === selectedTask);
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
function syncInputsFromTask() {
|
| 563 |
+
const task = getSelectedTaskMeta();
|
| 564 |
+
if (!task) return;
|
| 565 |
+
tokenBudgetInput.value = task.token_budget;
|
| 566 |
+
maxStepsInput.value = task.max_steps;
|
| 567 |
+
}
|
| 568 |
+
|
| 569 |
function renderTasks(tasks) {
|
| 570 |
taskList.innerHTML = "";
|
| 571 |
+
tasks.forEach((task) => {
|
| 572 |
const btn = document.createElement("button");
|
| 573 |
+
btn.type = "button";
|
| 574 |
btn.className = "task-btn" + (task.name === selectedTask ? " active" : "");
|
| 575 |
+
btn.innerHTML = `
|
| 576 |
+
<div><strong>${task.name}</strong></div>
|
| 577 |
+
<div class="muted">${task.difficulty} | budget ${task.token_budget} | max steps ${task.max_steps}</div>
|
| 578 |
+
<div class="muted">${task.description}</div>
|
| 579 |
+
`;
|
| 580 |
+
btn.addEventListener("click", () => {
|
| 581 |
selectedTask = task.name;
|
| 582 |
renderTasks(tasks);
|
| 583 |
+
syncInputsFromTask();
|
| 584 |
+
taskStat.textContent = task.name;
|
| 585 |
+
feedbackBox.textContent = "Preset changed. Add your prompt and press Start / Reset.";
|
| 586 |
+
});
|
| 587 |
taskList.appendChild(btn);
|
| 588 |
});
|
| 589 |
}
|
| 590 |
|
| 591 |
function renderObservation(observation) {
|
| 592 |
currentObservation = observation;
|
| 593 |
+
queryBox.textContent = observation.query || "No active query.";
|
| 594 |
budgetStat.textContent = observation.token_budget;
|
| 595 |
usedStat.textContent = observation.total_tokens_used;
|
| 596 |
stepStat.textContent = observation.step_number;
|
| 597 |
+
taskStat.textContent = observation.task_name;
|
| 598 |
+
if (observation.last_action_feedback) feedbackBox.textContent = observation.last_action_feedback;
|
| 599 |
observationPanel.textContent = JSON.stringify(observation, null, 2);
|
| 600 |
|
| 601 |
const selected = new Set(observation.selected_chunks || []);
|
| 602 |
chunkGrid.innerHTML = "";
|
| 603 |
+
observation.available_chunks.forEach((chunk) => {
|
| 604 |
const card = document.createElement("div");
|
| 605 |
+
card.className = "chunk-card" + (selected.has(chunk.chunk_id) ? " selected" : "");
|
| 606 |
+
const keywordHtml = (chunk.keywords || []).map((keyword) => `<span class="pill">${keyword}</span>`).join("");
|
|
|
|
|
|
|
|
|
|
| 607 |
card.innerHTML = `
|
| 608 |
<h4>${chunk.chunk_id}</h4>
|
| 609 |
+
<div style="margin-bottom:8px;">
|
| 610 |
+
<span class="pill">${chunk.domain}</span>
|
| 611 |
+
<span class="pill">${chunk.tokens} tokens</span>
|
| 612 |
+
</div>
|
| 613 |
+
<div>${keywordHtml}</div>
|
| 614 |
+
<div class="toolbar" style="margin-top:12px;">
|
| 615 |
+
<button class="secondary" type="button" data-action="${selected.has(chunk.chunk_id) ? "deselect_chunk" : "select_chunk"}" data-id="${chunk.chunk_id}">
|
| 616 |
+
${selected.has(chunk.chunk_id) ? "Deselect" : "Select"}
|
| 617 |
+
</button>
|
| 618 |
+
<button class="secondary" type="button" data-action="compress_chunk" data-id="${chunk.chunk_id}">Compress 50%</button>
|
| 619 |
</div>
|
| 620 |
`;
|
| 621 |
chunkGrid.appendChild(card);
|
| 622 |
});
|
| 623 |
|
| 624 |
+
chunkGrid.querySelectorAll("button[data-action]").forEach((button) => {
|
| 625 |
+
button.addEventListener("click", async () => {
|
| 626 |
+
const actionType = button.dataset.action;
|
| 627 |
+
const chunkId = button.dataset.id;
|
| 628 |
+
if (actionType === "compress_chunk") {
|
| 629 |
+
await step({ action_type: actionType, chunk_id: chunkId, compression_ratio: 0.5 });
|
| 630 |
+
} else {
|
| 631 |
+
await step({ action_type: actionType, chunk_id: chunkId });
|
|
|
|
|
|
|
| 632 |
}
|
| 633 |
});
|
| 634 |
});
|
| 635 |
}
|
| 636 |
|
| 637 |
async function refreshState() {
|
| 638 |
+
try {
|
| 639 |
+
const state = await fetchJson("/state", { method: "GET" });
|
| 640 |
+
statePanel.textContent = JSON.stringify(state, null, 2);
|
| 641 |
+
} catch (error) {
|
| 642 |
+
statePanel.textContent = JSON.stringify({ error: error.message }, null, 2);
|
| 643 |
+
}
|
| 644 |
}
|
| 645 |
|
| 646 |
async function resetTask() {
|
| 647 |
+
const customQuery = customQueryInput.value.trim();
|
| 648 |
+
if (!customQuery) {
|
| 649 |
+
feedbackBox.textContent = "Please enter your own prompt first.";
|
| 650 |
+
return;
|
| 651 |
+
}
|
| 652 |
+
try {
|
| 653 |
+
const body = await fetchJson("/reset", {
|
| 654 |
+
method: "POST",
|
| 655 |
+
body: JSON.stringify({
|
| 656 |
+
task_name: selectedTask,
|
| 657 |
+
custom_query: customQuery,
|
| 658 |
+
token_budget: tokenBudgetInput.value ? Number(tokenBudgetInput.value) : null,
|
| 659 |
+
max_steps: maxStepsInput.value ? Number(maxStepsInput.value) : null
|
| 660 |
+
})
|
| 661 |
+
});
|
| 662 |
+
currentDone = false;
|
| 663 |
+
renderObservation(body.observation);
|
| 664 |
+
feedbackBox.textContent = "Episode started with your custom prompt.";
|
| 665 |
+
await refreshState();
|
| 666 |
+
} catch (error) {
|
| 667 |
+
feedbackBox.textContent = error.message;
|
| 668 |
+
}
|
| 669 |
}
|
| 670 |
|
| 671 |
async function step(payload) {
|
| 672 |
+
if (!currentObservation) {
|
| 673 |
+
feedbackBox.textContent = "Start an episode first.";
|
| 674 |
+
return null;
|
| 675 |
+
}
|
| 676 |
try {
|
| 677 |
const body = await fetchJson("/step", {
|
| 678 |
method: "POST",
|
| 679 |
+
body: JSON.stringify(payload)
|
| 680 |
});
|
| 681 |
renderObservation(body.observation);
|
| 682 |
+
currentDone = Boolean(body.done);
|
| 683 |
if (body.info && body.info.grader_breakdown) {
|
| 684 |
+
feedbackBox.textContent = `Final score ${Number(body.reward || 0).toFixed(4)} | ${JSON.stringify(body.info.grader_breakdown)}`;
|
| 685 |
}
|
| 686 |
await refreshState();
|
| 687 |
+
return body;
|
| 688 |
+
} catch (error) {
|
| 689 |
+
feedbackBox.textContent = error.message;
|
| 690 |
+
return null;
|
| 691 |
+
}
|
| 692 |
+
}
|
| 693 |
+
|
| 694 |
+
async function optimizeStep() {
|
| 695 |
+
if (!currentObservation) {
|
| 696 |
+
feedbackBox.textContent = "Start an episode first.";
|
| 697 |
+
return;
|
| 698 |
+
}
|
| 699 |
+
try {
|
| 700 |
+
const suggestion = await fetchJson("/optimize-step", { method: "POST" });
|
| 701 |
+
feedbackBox.textContent = "Optimizer chose: " + JSON.stringify(suggestion);
|
| 702 |
+
await step(suggestion);
|
| 703 |
} catch (error) {
|
| 704 |
feedbackBox.textContent = error.message;
|
| 705 |
}
|
| 706 |
}
|
| 707 |
|
| 708 |
+
async function autoRun() {
|
| 709 |
+
if (!currentObservation) {
|
| 710 |
+
feedbackBox.textContent = "Start an episode first.";
|
| 711 |
+
return;
|
| 712 |
+
}
|
| 713 |
+
for (let index = 0; index < 20; index += 1) {
|
| 714 |
+
if (currentDone) break;
|
| 715 |
+
const suggestion = await fetchJson("/optimize-step", { method: "POST" });
|
| 716 |
+
const result = await step(suggestion);
|
| 717 |
+
if (!result || result.done || suggestion.action_type === "submit_answer") break;
|
| 718 |
+
}
|
| 719 |
+
}
|
| 720 |
+
|
| 721 |
document.getElementById("resetBtn").addEventListener("click", resetTask);
|
| 722 |
+
document.getElementById("autoStepBtn").addEventListener("click", optimizeStep);
|
| 723 |
+
document.getElementById("autoRunBtn").addEventListener("click", autoRun);
|
| 724 |
document.getElementById("refreshBtn").addEventListener("click", refreshState);
|
| 725 |
document.getElementById("submitBtn").addEventListener("click", async () => {
|
| 726 |
+
const manualAnswer = answerInput.value.trim() || "Concise answer synthesized from the selected evidence.";
|
| 727 |
+
await step({ action_type: "submit_answer", answer: manualAnswer });
|
| 728 |
});
|
| 729 |
|
| 730 |
(async function init() {
|
| 731 |
+
allTasks = await fetchJson("/tasks", { method: "GET" });
|
| 732 |
+
renderTasks(allTasks);
|
| 733 |
+
syncInputsFromTask();
|
| 734 |
+
taskStat.textContent = selectedTask;
|
| 735 |
+
feedbackBox.textContent = "Add any prompt you want, then press Start / Reset.";
|
| 736 |
+
observationPanel.textContent = JSON.stringify({ message: "No active episode yet." }, null, 2);
|
| 737 |
+
statePanel.textContent = JSON.stringify({ message: "No active episode yet." }, null, 2);
|
| 738 |
})();
|
| 739 |
</script>
|
| 740 |
</body>
|
|
|
|
| 752 |
|
| 753 |
@app.get("/", response_class=HTMLResponse)
|
| 754 |
async def home_page():
|
| 755 |
+
return HTMLResponse(UI_HTML_V2)
|
| 756 |
|
| 757 |
|
| 758 |
def _serialize_observation(observation: Any) -> dict[str, Any]:
|
|
|
|
| 791 |
}
|
| 792 |
|
| 793 |
|
| 794 |
+
def _tokenize(text: str) -> set[str]:
|
| 795 |
+
import re
|
| 796 |
+
|
| 797 |
+
return set(re.findall(r"[a-z0-9]+", text.lower()))
|
| 798 |
+
|
| 799 |
+
|
| 800 |
+
def _suggest_action(env: RagContextOptimizerEnv) -> dict[str, Any]:
|
| 801 |
+
observation = env._build_observation()
|
| 802 |
+
query_terms = _tokenize(observation.query)
|
| 803 |
+
selected = set(observation.selected_chunks)
|
| 804 |
+
remaining_budget = observation.token_budget - observation.total_tokens_used
|
| 805 |
+
|
| 806 |
+
def score_chunk(chunk: Any) -> tuple[float, int, str]:
|
| 807 |
+
keyword_terms = _tokenize(" ".join(chunk.keywords))
|
| 808 |
+
overlap = len(query_terms & keyword_terms)
|
| 809 |
+
union = len(query_terms | keyword_terms) or 1
|
| 810 |
+
relevance = overlap / union
|
| 811 |
+
ratio = relevance / max(chunk.tokens, 1)
|
| 812 |
+
return (-ratio, chunk.tokens, chunk.chunk_id)
|
| 813 |
+
|
| 814 |
+
selected_chunks = [chunk for chunk in observation.available_chunks if chunk.chunk_id in selected]
|
| 815 |
+
if selected_chunks and (
|
| 816 |
+
observation.total_tokens_used >= int(observation.token_budget * 0.65)
|
| 817 |
+
or observation.step_number >= 3
|
| 818 |
+
):
|
| 819 |
+
heavy = sorted(selected_chunks, key=lambda chunk: (-chunk.tokens, chunk.chunk_id))
|
| 820 |
+
if heavy and heavy[0].tokens > max(120, observation.token_budget // 4):
|
| 821 |
+
return {
|
| 822 |
+
"action_type": "compress_chunk",
|
| 823 |
+
"chunk_id": heavy[0].chunk_id,
|
| 824 |
+
"compression_ratio": 0.5,
|
| 825 |
+
}
|
| 826 |
+
|
| 827 |
+
if len(selected) >= 2 or observation.step_number >= max(2, env.task.max_steps - 2):
|
| 828 |
+
chosen_keywords: list[str] = []
|
| 829 |
+
for chunk in selected_chunks[:3]:
|
| 830 |
+
chosen_keywords.extend(chunk.keywords[:2])
|
| 831 |
+
answer = (
|
| 832 |
+
"Optimized answer based on selected evidence: "
|
| 833 |
+
+ ", ".join(chosen_keywords[:6])
|
| 834 |
+
if chosen_keywords
|
| 835 |
+
else "Optimized answer based on the currently selected evidence."
|
| 836 |
+
)
|
| 837 |
+
return {"action_type": "submit_answer", "answer": answer}
|
| 838 |
+
|
| 839 |
+
available = [chunk for chunk in observation.available_chunks if chunk.chunk_id not in selected]
|
| 840 |
+
for chunk in sorted(available, key=score_chunk):
|
| 841 |
+
if chunk.tokens <= remaining_budget:
|
| 842 |
+
return {"action_type": "select_chunk", "chunk_id": chunk.chunk_id}
|
| 843 |
+
|
| 844 |
+
if selected_chunks:
|
| 845 |
+
return {
|
| 846 |
+
"action_type": "submit_answer",
|
| 847 |
+
"answer": "Optimized answer based on the currently selected evidence.",
|
| 848 |
+
}
|
| 849 |
+
if available:
|
| 850 |
+
smallest_chunk = min(available, key=lambda chunk: (chunk.tokens, chunk.chunk_id))
|
| 851 |
+
return {
|
| 852 |
+
"action_type": "submit_answer",
|
| 853 |
+
"answer": (
|
| 854 |
+
"No chunk fits within the current token budget. "
|
| 855 |
+
f"Increase the budget to at least {smallest_chunk.tokens} tokens or choose a broader budget."
|
| 856 |
+
),
|
| 857 |
+
}
|
| 858 |
+
return {"action_type": "submit_answer", "answer": "No usable evidence was available."}
|
| 859 |
+
|
| 860 |
+
|
| 861 |
@app.post("/reset")
|
| 862 |
async def reset_endpoint(payload: ResetRequest):
|
| 863 |
if payload.task_name not in TASKS_BY_NAME:
|
| 864 |
raise HTTPException(status_code=400, detail="Unknown task_name.")
|
| 865 |
+
env = RagContextOptimizerEnv(
|
| 866 |
+
task_name=payload.task_name,
|
| 867 |
+
query_override=payload.custom_query,
|
| 868 |
+
token_budget_override=payload.token_budget,
|
| 869 |
+
max_steps_override=payload.max_steps,
|
| 870 |
+
)
|
| 871 |
app.state.env = env
|
| 872 |
result = await env.reset()
|
| 873 |
return _serialize_step_result(result, reset=True)
|
|
|
|
| 907 |
"description": task.description,
|
| 908 |
"difficulty": task.difficulty,
|
| 909 |
"token_budget": task.token_budget,
|
| 910 |
+
"query": task.query,
|
| 911 |
+
"max_steps": task.max_steps,
|
| 912 |
}
|
| 913 |
for task in ALL_TASKS
|
| 914 |
]
|
| 915 |
|
| 916 |
|
| 917 |
+
@app.post("/optimize-step")
|
| 918 |
+
async def optimize_step_endpoint():
|
| 919 |
+
env = getattr(app.state, "env", None)
|
| 920 |
+
if env is None:
|
| 921 |
+
raise HTTPException(status_code=400, detail="Environment is not initialized. Call /reset first.")
|
| 922 |
+
return _suggest_action(env)
|
| 923 |
+
|
| 924 |
+
|
| 925 |
if __name__ == "__main__":
|
| 926 |
import uvicorn
|
| 927 |
|
env/__pycache__/environment.cpython-314.pyc
CHANGED
|
Binary files a/env/__pycache__/environment.cpython-314.pyc and b/env/__pycache__/environment.cpython-314.pyc differ
|
|
|
env/environment.py
CHANGED
|
@@ -4,7 +4,7 @@ Main OpenEnv-style environment for rag-context-optimizer.
|
|
| 4 |
|
| 5 |
from __future__ import annotations
|
| 6 |
|
| 7 |
-
from dataclasses import asdict, dataclass, is_dataclass
|
| 8 |
from pathlib import Path
|
| 9 |
from typing import Any
|
| 10 |
|
|
@@ -24,7 +24,13 @@ class StepResult:
|
|
| 24 |
|
| 25 |
|
| 26 |
class RagContextOptimizerEnv:
|
| 27 |
-
def __init__(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
if task_name not in TASKS_BY_NAME:
|
| 29 |
raise ValueError(f"Unknown task_name: {task_name}")
|
| 30 |
|
|
@@ -32,7 +38,12 @@ class RagContextOptimizerEnv:
|
|
| 32 |
self._all_chunks = load_corpus(self._corpus_path)
|
| 33 |
self.retriever = HybridRetriever(self._all_chunks)
|
| 34 |
self.grader = TaskGrader()
|
| 35 |
-
self.task: Task =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
self._available_chunks: list[Chunk] = []
|
| 38 |
self._selected_chunks: list[str] = []
|
|
@@ -42,6 +53,22 @@ class RagContextOptimizerEnv:
|
|
| 42 |
self._last_action_feedback: str | None = None
|
| 43 |
self._last_answer = ""
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
async def reset(self) -> StepResult:
|
| 46 |
self._available_chunks = self._filter_chunks_for_task(self.task)
|
| 47 |
self._selected_chunks = []
|
|
|
|
| 4 |
|
| 5 |
from __future__ import annotations
|
| 6 |
|
| 7 |
+
from dataclasses import asdict, dataclass, is_dataclass, replace
|
| 8 |
from pathlib import Path
|
| 9 |
from typing import Any
|
| 10 |
|
|
|
|
| 24 |
|
| 25 |
|
| 26 |
class RagContextOptimizerEnv:
|
| 27 |
+
def __init__(
|
| 28 |
+
self,
|
| 29 |
+
task_name: str = "single_domain_qa",
|
| 30 |
+
query_override: str | None = None,
|
| 31 |
+
token_budget_override: int | None = None,
|
| 32 |
+
max_steps_override: int | None = None,
|
| 33 |
+
):
|
| 34 |
if task_name not in TASKS_BY_NAME:
|
| 35 |
raise ValueError(f"Unknown task_name: {task_name}")
|
| 36 |
|
|
|
|
| 38 |
self._all_chunks = load_corpus(self._corpus_path)
|
| 39 |
self.retriever = HybridRetriever(self._all_chunks)
|
| 40 |
self.grader = TaskGrader()
|
| 41 |
+
self.task: Task = self._build_task(
|
| 42 |
+
TASKS_BY_NAME[task_name],
|
| 43 |
+
query_override=query_override,
|
| 44 |
+
token_budget_override=token_budget_override,
|
| 45 |
+
max_steps_override=max_steps_override,
|
| 46 |
+
)
|
| 47 |
|
| 48 |
self._available_chunks: list[Chunk] = []
|
| 49 |
self._selected_chunks: list[str] = []
|
|
|
|
| 53 |
self._last_action_feedback: str | None = None
|
| 54 |
self._last_answer = ""
|
| 55 |
|
| 56 |
+
@staticmethod
|
| 57 |
+
def _build_task(
|
| 58 |
+
base_task: Task,
|
| 59 |
+
query_override: str | None = None,
|
| 60 |
+
token_budget_override: int | None = None,
|
| 61 |
+
max_steps_override: int | None = None,
|
| 62 |
+
) -> Task:
|
| 63 |
+
updated_task = base_task
|
| 64 |
+
if query_override and query_override.strip():
|
| 65 |
+
updated_task = replace(updated_task, query=query_override.strip())
|
| 66 |
+
if token_budget_override is not None and token_budget_override > 0:
|
| 67 |
+
updated_task = replace(updated_task, token_budget=token_budget_override)
|
| 68 |
+
if max_steps_override is not None and max_steps_override > 0:
|
| 69 |
+
updated_task = replace(updated_task, max_steps=max_steps_override)
|
| 70 |
+
return updated_task
|
| 71 |
+
|
| 72 |
async def reset(self) -> StepResult:
|
| 73 |
self._available_chunks = self._filter_chunks_for_task(self.task)
|
| 74 |
self._selected_chunks = []
|
streamlit_app.py
CHANGED
|
@@ -6,9 +6,9 @@ import streamlit as st
|
|
| 6 |
|
| 7 |
API_URL = st.secrets.get("API_URL", "http://localhost:7860") if hasattr(st, "secrets") else "http://localhost:7860"
|
| 8 |
|
| 9 |
-
st.set_page_config(page_title="rag-context-optimizer", page_icon="
|
| 10 |
st.title("RAG Context Optimizer")
|
| 11 |
-
st.caption("
|
| 12 |
|
| 13 |
|
| 14 |
def api_get(path: str):
|
|
@@ -17,52 +17,142 @@ def api_get(path: str):
|
|
| 17 |
return response.json()
|
| 18 |
|
| 19 |
|
| 20 |
-
def api_post(path: str, payload: dict):
|
| 21 |
-
response = requests.post(f"{API_URL}{path}", json=payload, timeout=20)
|
| 22 |
response.raise_for_status()
|
| 23 |
return response.json()
|
| 24 |
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
if st.sidebar.button("Reset Task", use_container_width=True):
|
| 31 |
-
st.session_state["reset_payload"] = api_post("/reset", {"task_name": selected_task})
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
payload = st.session_state["
|
| 37 |
observation = payload["observation"]
|
| 38 |
|
| 39 |
-
col1, col2, col3 = st.columns(
|
| 40 |
-
col1.metric("
|
| 41 |
-
col2.metric("
|
| 42 |
-
col3.metric("
|
|
|
|
| 43 |
|
| 44 |
-
st.subheader("Query")
|
| 45 |
st.info(observation["query"])
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
| 62 |
st.rerun()
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
st.subheader("Observation")
|
| 65 |
-
st.json(
|
| 66 |
|
| 67 |
st.subheader("State")
|
| 68 |
st.json(api_get("/state"))
|
|
|
|
| 6 |
|
| 7 |
API_URL = st.secrets.get("API_URL", "http://localhost:7860") if hasattr(st, "secrets") else "http://localhost:7860"
|
| 8 |
|
| 9 |
+
st.set_page_config(page_title="rag-context-optimizer", page_icon="R", layout="wide")
|
| 10 |
st.title("RAG Context Optimizer")
|
| 11 |
+
st.caption("Use any prompt, keep the token budget tight, and let the optimizer pick the best evidence per token.")
|
| 12 |
|
| 13 |
|
| 14 |
def api_get(path: str):
|
|
|
|
| 17 |
return response.json()
|
| 18 |
|
| 19 |
|
| 20 |
+
def api_post(path: str, payload: dict | None = None):
|
| 21 |
+
response = requests.post(f"{API_URL}{path}", json=payload or {}, timeout=20)
|
| 22 |
response.raise_for_status()
|
| 23 |
return response.json()
|
| 24 |
|
| 25 |
|
| 26 |
+
def start_episode(task_name: str, query: str, token_budget: int, max_steps: int):
|
| 27 |
+
st.session_state["payload"] = api_post(
|
| 28 |
+
"/reset",
|
| 29 |
+
{
|
| 30 |
+
"task_name": task_name,
|
| 31 |
+
"custom_query": query,
|
| 32 |
+
"token_budget": token_budget,
|
| 33 |
+
"max_steps": max_steps,
|
| 34 |
+
},
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def do_step(payload: dict):
|
| 39 |
+
st.session_state["payload"] = api_post("/step", payload)
|
| 40 |
|
|
|
|
|
|
|
| 41 |
|
| 42 |
+
tasks = api_get("/tasks")
|
| 43 |
+
task_map = {task["name"]: task for task in tasks}
|
| 44 |
+
|
| 45 |
+
selected_task = st.sidebar.selectbox("Task preset", list(task_map))
|
| 46 |
+
task_meta = task_map[selected_task]
|
| 47 |
+
|
| 48 |
+
default_query = st.session_state.get("custom_query", "")
|
| 49 |
+
custom_query = st.sidebar.text_area(
|
| 50 |
+
"Custom prompt",
|
| 51 |
+
value=default_query,
|
| 52 |
+
height=180,
|
| 53 |
+
placeholder="Enter any prompt you want to optimize for minimal token usage.",
|
| 54 |
+
)
|
| 55 |
+
token_budget = st.sidebar.number_input(
|
| 56 |
+
"Token budget",
|
| 57 |
+
min_value=50,
|
| 58 |
+
value=int(task_meta["token_budget"]),
|
| 59 |
+
step=10,
|
| 60 |
+
)
|
| 61 |
+
max_steps = st.sidebar.number_input(
|
| 62 |
+
"Max steps",
|
| 63 |
+
min_value=1,
|
| 64 |
+
value=int(task_meta["max_steps"]),
|
| 65 |
+
step=1,
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
st.session_state["custom_query"] = custom_query
|
| 69 |
+
|
| 70 |
+
sidebar_cols = st.sidebar.columns(2)
|
| 71 |
+
if sidebar_cols[0].button("Start / Reset", use_container_width=True):
|
| 72 |
+
if not custom_query.strip():
|
| 73 |
+
st.sidebar.error("Enter a custom prompt first.")
|
| 74 |
+
else:
|
| 75 |
+
start_episode(selected_task, custom_query.strip(), int(token_budget), int(max_steps))
|
| 76 |
+
st.rerun()
|
| 77 |
+
|
| 78 |
+
if sidebar_cols[1].button("Refresh", use_container_width=True):
|
| 79 |
+
st.rerun()
|
| 80 |
+
|
| 81 |
+
if "payload" not in st.session_state:
|
| 82 |
+
st.info("Add your prompt in the sidebar and press Start / Reset.")
|
| 83 |
+
st.stop()
|
| 84 |
|
| 85 |
+
payload = st.session_state["payload"]
|
| 86 |
observation = payload["observation"]
|
| 87 |
|
| 88 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 89 |
+
col1.metric("Task", observation["task_name"])
|
| 90 |
+
col2.metric("Budget", observation["token_budget"])
|
| 91 |
+
col3.metric("Used", observation["total_tokens_used"])
|
| 92 |
+
col4.metric("Step", observation["step_number"])
|
| 93 |
|
| 94 |
+
st.subheader("Active Query")
|
| 95 |
st.info(observation["query"])
|
| 96 |
|
| 97 |
+
feedback = observation.get("last_action_feedback")
|
| 98 |
+
if feedback:
|
| 99 |
+
st.warning(feedback)
|
| 100 |
+
if payload.get("info", {}).get("grader_breakdown"):
|
| 101 |
+
st.success(f"Final score: {payload.get('reward', 0):.4f}")
|
| 102 |
+
st.json(payload["info"]["grader_breakdown"])
|
| 103 |
+
|
| 104 |
+
action_cols = st.columns(3)
|
| 105 |
+
if action_cols[0].button("Auto Optimize Step", use_container_width=True):
|
| 106 |
+
suggestion = api_post("/optimize-step")
|
| 107 |
+
do_step(suggestion)
|
| 108 |
+
st.rerun()
|
| 109 |
+
if action_cols[1].button("Auto Run", use_container_width=True):
|
| 110 |
+
for _ in range(20):
|
| 111 |
+
suggestion = api_post("/optimize-step")
|
| 112 |
+
do_step(suggestion)
|
| 113 |
+
if suggestion["action_type"] == "submit_answer" or st.session_state["payload"]["done"]:
|
| 114 |
+
break
|
| 115 |
st.rerun()
|
| 116 |
|
| 117 |
+
manual_answer = action_cols[2].text_input("Manual answer", value="")
|
| 118 |
+
if st.button("Submit Manual Answer", type="primary", use_container_width=True):
|
| 119 |
+
do_step(
|
| 120 |
+
{
|
| 121 |
+
"action_type": "submit_answer",
|
| 122 |
+
"answer": manual_answer.strip() or "Concise answer synthesized from the selected evidence.",
|
| 123 |
+
}
|
| 124 |
+
)
|
| 125 |
+
st.rerun()
|
| 126 |
+
|
| 127 |
+
st.subheader("Available Chunks")
|
| 128 |
+
chunk_columns = st.columns(2)
|
| 129 |
+
for index, chunk in enumerate(observation["available_chunks"]):
|
| 130 |
+
selected = chunk["chunk_id"] in set(observation["selected_chunks"])
|
| 131 |
+
container = chunk_columns[index % 2].container(border=True)
|
| 132 |
+
container.markdown(f"**{chunk['chunk_id']}**")
|
| 133 |
+
container.caption(f"{chunk['domain']} | {chunk['tokens']} tokens")
|
| 134 |
+
container.write(", ".join(chunk["keywords"]))
|
| 135 |
+
c1, c2 = container.columns(2)
|
| 136 |
+
if selected:
|
| 137 |
+
if c1.button("Deselect", key=f"deselect-{chunk['chunk_id']}", use_container_width=True):
|
| 138 |
+
do_step({"action_type": "deselect_chunk", "chunk_id": chunk["chunk_id"]})
|
| 139 |
+
st.rerun()
|
| 140 |
+
else:
|
| 141 |
+
if c1.button("Select", key=f"select-{chunk['chunk_id']}", use_container_width=True):
|
| 142 |
+
do_step({"action_type": "select_chunk", "chunk_id": chunk["chunk_id"]})
|
| 143 |
+
st.rerun()
|
| 144 |
+
if c2.button("Compress 50%", key=f"compress-{chunk['chunk_id']}", use_container_width=True):
|
| 145 |
+
do_step(
|
| 146 |
+
{
|
| 147 |
+
"action_type": "compress_chunk",
|
| 148 |
+
"chunk_id": chunk["chunk_id"],
|
| 149 |
+
"compression_ratio": 0.5,
|
| 150 |
+
}
|
| 151 |
+
)
|
| 152 |
+
st.rerun()
|
| 153 |
+
|
| 154 |
st.subheader("Observation")
|
| 155 |
+
st.json(payload)
|
| 156 |
|
| 157 |
st.subheader("State")
|
| 158 |
st.json(api_get("/state"))
|