NITISHRG15102007 commited on
Commit
f8d0eb2
·
verified ·
1 Parent(s): de095eb

Fix custom prompt UI and token optimizer flow

Browse files
__pycache__/app.cpython-314.pyc CHANGED
Binary files a/__pycache__/app.cpython-314.pyc and b/__pycache__/app.cpython-314.pyc differ
 
__pycache__/streamlit_app.cpython-314.pyc CHANGED
Binary files a/__pycache__/streamlit_app.cpython-314.pyc and b/__pycache__/streamlit_app.cpython-314.pyc differ
 
app.py CHANGED
@@ -20,6 +20,9 @@ from env.tasks import ALL_TASKS, TASKS_BY_NAME
20
 
21
  class ResetRequest(BaseModel):
22
  task_name: Literal["single_domain_qa", "cross_domain_synthesis", "adversarial_compression"]
 
 
 
23
 
24
 
25
  @asynccontextmanager
@@ -49,251 +52,478 @@ app.add_middleware(
49
  UI_HTML = """
50
  <!doctype html>
51
  <html lang="en">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  <head>
53
  <meta charset="utf-8" />
54
  <meta name="viewport" content="width=device-width, initial-scale=1" />
55
  <title>rag-context-optimizer</title>
56
  <style>
57
  :root {
58
- --bg: #f4f7fb;
59
  --panel: #ffffff;
60
- --ink: #122033;
61
- --muted: #64748b;
62
- --line: #dbe4f0;
63
  --accent: #0f766e;
64
- --accent-2: #0ea5e9;
65
  --warn: #b45309;
 
66
  }
67
  * { box-sizing: border-box; }
68
  body {
69
  margin: 0;
70
  font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
 
71
  color: var(--ink);
72
- background:
73
- radial-gradient(circle at top left, #d9f99d 0, transparent 22%),
74
- radial-gradient(circle at top right, #bfdbfe 0, transparent 25%),
75
- linear-gradient(180deg, #f8fbff 0%, var(--bg) 100%);
76
- }
77
- .shell {
78
- max-width: 1240px;
79
- margin: 0 auto;
80
- padding: 28px 20px 48px;
81
- }
82
- .hero {
83
- display: grid;
84
- grid-template-columns: 1.2fr 0.8fr;
85
- gap: 18px;
86
- margin-bottom: 18px;
87
  }
 
 
 
88
  .card {
89
  background: var(--panel);
90
  border: 1px solid var(--line);
91
- border-radius: 20px;
92
  padding: 18px;
93
- box-shadow: 0 10px 30px rgba(15, 23, 42, 0.05);
94
  }
95
  h1, h2, h3, p { margin-top: 0; }
96
- h1 { font-size: 2.1rem; margin-bottom: 10px; }
97
- .subtitle { color: var(--muted); line-height: 1.5; }
98
- .stat-grid {
99
- display: grid;
100
- grid-template-columns: repeat(3, 1fr);
101
- gap: 12px;
102
- margin-top: 18px;
103
- }
104
- .stat {
105
- border: 1px solid var(--line);
106
- border-radius: 16px;
107
- padding: 12px;
108
- background: #fcfdff;
109
- }
110
- .stat .label { font-size: 0.82rem; color: var(--muted); }
111
- .stat .value { font-size: 1.4rem; font-weight: 700; margin-top: 6px; }
112
- .layout {
113
- display: grid;
114
- grid-template-columns: 320px 1fr;
115
- gap: 18px;
116
- }
117
- .task-list { display: grid; gap: 10px; }
118
  .task-btn {
119
  width: 100%;
120
  text-align: left;
121
  border: 1px solid var(--line);
 
 
122
  background: #fff;
123
- border-radius: 14px;
124
- padding: 12px 14px;
125
  cursor: pointer;
126
- transition: 0.18s ease;
127
  }
128
  .task-btn:hover, .task-btn.active {
129
- border-color: var(--accent-2);
130
- transform: translateY(-1px);
131
- box-shadow: 0 8px 18px rgba(14, 165, 233, 0.12);
132
- }
133
- .task-btn .name { font-weight: 700; }
134
- .task-btn .meta { color: var(--muted); font-size: 0.85rem; margin-top: 4px; }
135
- .toolbar {
136
- display: flex;
137
- flex-wrap: wrap;
138
- gap: 10px;
139
- margin-bottom: 14px;
140
  }
141
- button.action, select, textarea {
142
- border-radius: 12px;
143
- border: 1px solid var(--line);
144
- font: inherit;
145
- }
146
- button.action {
147
- background: linear-gradient(135deg, var(--accent), var(--accent-2));
148
- color: white;
149
- padding: 10px 14px;
150
  cursor: pointer;
151
- border: none;
 
152
  font-weight: 700;
153
  }
154
- button.action.secondary {
155
- background: white;
156
- color: var(--ink);
157
- border: 1px solid var(--line);
158
- }
159
- .query {
160
- font-size: 1.05rem;
161
- font-weight: 600;
162
  padding: 14px;
163
- border-radius: 16px;
164
- background: #f8fafc;
165
- border: 1px solid var(--line);
166
- margin-bottom: 14px;
167
- }
168
- .grid {
169
- display: grid;
170
- grid-template-columns: repeat(auto-fill, minmax(240px, 1fr));
171
- gap: 12px;
172
- }
173
- .chunk {
174
  border: 1px solid var(--line);
175
  border-radius: 16px;
176
- padding: 12px;
177
- background: #fff;
178
- }
179
- .chunk.selected {
180
- border-color: var(--accent);
181
- background: #f0fdfa;
182
  }
183
- .chunk h4 { margin: 0 0 8px; font-size: 1rem; }
 
 
184
  .pill {
185
  display: inline-block;
 
 
186
  border-radius: 999px;
187
- padding: 3px 8px;
188
- background: #eef6ff;
189
  color: #1d4ed8;
190
  font-size: 0.75rem;
191
- margin-right: 6px;
192
- margin-bottom: 6px;
193
- }
194
- .row {
195
- display: flex;
196
- gap: 8px;
197
- align-items: center;
198
- flex-wrap: wrap;
199
- margin-top: 10px;
200
- }
201
- .answer-box {
202
- width: 100%;
203
- min-height: 110px;
204
- padding: 12px;
205
- resize: vertical;
206
- }
207
- .panel-grid {
208
- display: grid;
209
- grid-template-columns: 1fr 1fr;
210
- gap: 12px;
211
- margin-top: 14px;
212
  }
 
213
  pre {
214
  margin: 0;
215
  white-space: pre-wrap;
216
  word-break: break-word;
217
- font-size: 0.88rem;
 
218
  background: #0f172a;
219
  color: #e2e8f0;
220
  padding: 14px;
221
- border-radius: 16px;
222
- min-height: 160px;
223
- }
224
- .feedback { color: var(--warn); font-weight: 600; min-height: 24px; }
225
- @media (max-width: 980px) {
226
- .hero, .layout, .panel-grid { grid-template-columns: 1fr; }
227
- .stat-grid { grid-template-columns: 1fr; }
228
  }
 
 
 
 
229
  </style>
230
  </head>
231
  <body>
232
- <div class="shell">
233
- <section class="hero">
234
- <div class="card">
235
- <h1>RAG Context Optimizer</h1>
236
- <p class="subtitle">
237
- Interactively test retrieval, compression, and answer quality tradeoffs.
238
- This UI sits on top of the benchmark API, so you can explore the environment visually
239
- without losing the validator-friendly `/reset`, `/step`, and `/state` endpoints.
240
- </p>
241
- <div class="stat-grid">
242
- <div class="stat"><div class="label">Token Budget</div><div class="value" id="budgetStat">-</div></div>
243
- <div class="stat"><div class="label">Tokens Used</div><div class="value" id="usedStat">-</div></div>
244
- <div class="stat"><div class="label">Step</div><div class="value" id="stepStat">-</div></div>
245
- </div>
246
  </div>
247
- <div class="card">
248
- <h3>Quick Start</h3>
249
- <p class="subtitle">
250
- Pick a task, press reset, select evidence chunks, optionally compress them, and submit an answer.
251
- The right-hand panels mirror the raw observation and server state so debugging stays easy.
252
- </p>
253
- <div class="row">
254
- <a href="/docs" target="_blank"><button class="action secondary" type="button">Open API Docs</button></a>
255
- <a href="/health" target="_blank"><button class="action secondary" type="button">Health JSON</button></a>
 
 
 
 
 
 
 
 
 
 
 
 
256
  </div>
257
- </div>
258
- </section>
259
-
260
- <section class="layout">
261
- <aside class="card">
262
- <h3>Tasks</h3>
263
- <div id="taskList" class="task-list"></div>
264
  </aside>
265
 
266
- <main class="card">
267
- <div class="toolbar">
268
- <button id="resetBtn" class="action" type="button">Reset Task</button>
269
- <button id="refreshBtn" class="action secondary" type="button">Refresh State</button>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  </div>
271
- <div id="queryBox" class="query">Choose a task to begin.</div>
272
- <div class="feedback" id="feedbackBox"></div>
273
 
274
- <h3>Available Chunks</h3>
275
- <div id="chunkGrid" class="grid"></div>
 
 
 
276
 
277
- <h3 style="margin-top:18px;">Answer</h3>
278
- <textarea id="answerInput" class="answer-box" placeholder="Write a concise answer here before submitting..."></textarea>
279
- <div class="row">
280
- <button id="submitBtn" class="action" type="button">Submit Answer</button>
 
 
281
  </div>
282
 
283
- <div class="panel-grid">
284
- <div>
285
  <h3>Observation</h3>
286
  <pre id="observationPanel">{}</pre>
287
  </div>
288
- <div>
289
  <h3>State</h3>
290
  <pre id="statePanel">{}</pre>
291
  </div>
292
  </div>
293
- </main>
294
- </section>
295
  </div>
296
-
297
  <script>
298
  const taskList = document.getElementById("taskList");
299
  const chunkGrid = document.getElementById("chunkGrid");
@@ -304,121 +534,207 @@ UI_HTML = """
304
  const budgetStat = document.getElementById("budgetStat");
305
  const usedStat = document.getElementById("usedStat");
306
  const stepStat = document.getElementById("stepStat");
 
307
  const answerInput = document.getElementById("answerInput");
 
 
 
 
308
  let selectedTask = "single_domain_qa";
309
  let currentObservation = null;
 
 
310
 
311
  async function fetchJson(url, options = {}) {
312
  const response = await fetch(url, {
313
  headers: { "Content-Type": "application/json" },
314
- ...options,
315
  });
316
  const body = await response.json();
317
- if (!response.ok) {
318
- throw new Error(body.detail || JSON.stringify(body));
319
- }
320
  return body;
321
  }
322
 
 
 
 
 
 
 
 
 
 
 
 
323
  function renderTasks(tasks) {
324
  taskList.innerHTML = "";
325
- tasks.forEach(task => {
326
  const btn = document.createElement("button");
 
327
  btn.className = "task-btn" + (task.name === selectedTask ? " active" : "");
328
- btn.innerHTML = `<div class="name">${task.name}</div><div class="meta">${task.difficulty} · budget ${task.token_budget}</div><div class="meta">${task.description}</div>`;
329
- btn.onclick = () => {
 
 
 
 
330
  selectedTask = task.name;
331
  renderTasks(tasks);
332
- };
 
 
 
333
  taskList.appendChild(btn);
334
  });
335
  }
336
 
337
  function renderObservation(observation) {
338
  currentObservation = observation;
339
- queryBox.textContent = observation.query;
340
  budgetStat.textContent = observation.token_budget;
341
  usedStat.textContent = observation.total_tokens_used;
342
  stepStat.textContent = observation.step_number;
343
- feedbackBox.textContent = observation.last_action_feedback || "";
 
344
  observationPanel.textContent = JSON.stringify(observation, null, 2);
345
 
346
  const selected = new Set(observation.selected_chunks || []);
347
  chunkGrid.innerHTML = "";
348
- observation.available_chunks.forEach(chunk => {
349
  const card = document.createElement("div");
350
- card.className = "chunk" + (selected.has(chunk.chunk_id) ? " selected" : "");
351
- const pills = chunk.keywords.map(keyword => `<span class="pill">${keyword}</span>`).join("");
352
- const selectAction = selected.has(chunk.chunk_id)
353
- ? `<button class="action secondary" data-action="deselect" data-id="${chunk.chunk_id}">Deselect</button>`
354
- : `<button class="action secondary" data-action="select" data-id="${chunk.chunk_id}">Select</button>`;
355
  card.innerHTML = `
356
  <h4>${chunk.chunk_id}</h4>
357
- <div class="row"><span class="pill">${chunk.domain}</span><span class="pill">${chunk.tokens} tokens</span></div>
358
- <div style="margin-top:8px;">${pills}</div>
359
- <div class="row">
360
- ${selectAction}
361
- <button class="action secondary" data-action="compress" data-id="${chunk.chunk_id}">Compress 50%</button>
 
 
 
 
 
362
  </div>
363
  `;
364
  chunkGrid.appendChild(card);
365
  });
366
 
367
- chunkGrid.querySelectorAll("button[data-action]").forEach(btn => {
368
- btn.addEventListener("click", async () => {
369
- const action = btn.dataset.action;
370
- const chunkId = btn.dataset.id;
371
- if (action === "select") {
372
- await step({ action_type: "select_chunk", chunk_id: chunkId });
373
- } else if (action === "deselect") {
374
- await step({ action_type: "deselect_chunk", chunk_id: chunkId });
375
- } else if (action === "compress") {
376
- await step({ action_type: "compress_chunk", chunk_id: chunkId, compression_ratio: 0.5 });
377
  }
378
  });
379
  });
380
  }
381
 
382
  async function refreshState() {
383
- const state = await fetchJson("/state", { method: "GET" });
384
- statePanel.textContent = JSON.stringify(state, null, 2);
 
 
 
 
385
  }
386
 
387
  async function resetTask() {
388
- const body = await fetchJson("/reset", {
389
- method: "POST",
390
- body: JSON.stringify({ task_name: selectedTask }),
391
- });
392
- renderObservation(body.observation);
393
- await refreshState();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  }
395
 
396
  async function step(payload) {
 
 
 
 
397
  try {
398
  const body = await fetchJson("/step", {
399
  method: "POST",
400
- body: JSON.stringify(payload),
401
  });
402
  renderObservation(body.observation);
 
403
  if (body.info && body.info.grader_breakdown) {
404
- feedbackBox.textContent = "Final score: " + body.reward.toFixed(4) + " | " + JSON.stringify(body.info.grader_breakdown);
405
  }
406
  await refreshState();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  } catch (error) {
408
  feedbackBox.textContent = error.message;
409
  }
410
  }
411
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  document.getElementById("resetBtn").addEventListener("click", resetTask);
 
 
413
  document.getElementById("refreshBtn").addEventListener("click", refreshState);
414
  document.getElementById("submitBtn").addEventListener("click", async () => {
415
- await step({ action_type: "submit_answer", answer: answerInput.value || "A concise answer synthesized from the selected evidence." });
 
416
  });
417
 
418
  (async function init() {
419
- const tasks = await fetchJson("/tasks", { method: "GET" });
420
- renderTasks(tasks);
421
- await resetTask();
 
 
 
 
422
  })();
423
  </script>
424
  </body>
@@ -436,7 +752,7 @@ async def log_requests(request: Request, call_next):
436
 
437
  @app.get("/", response_class=HTMLResponse)
438
  async def home_page():
439
- return HTMLResponse(UI_HTML)
440
 
441
 
442
  def _serialize_observation(observation: Any) -> dict[str, Any]:
@@ -475,11 +791,83 @@ def _is_bad_action_event(event: str | None) -> bool:
475
  }
476
 
477
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  @app.post("/reset")
479
  async def reset_endpoint(payload: ResetRequest):
480
  if payload.task_name not in TASKS_BY_NAME:
481
  raise HTTPException(status_code=400, detail="Unknown task_name.")
482
- env = RagContextOptimizerEnv(task_name=payload.task_name)
 
 
 
 
 
483
  app.state.env = env
484
  result = await env.reset()
485
  return _serialize_step_result(result, reset=True)
@@ -519,11 +907,21 @@ async def tasks_endpoint():
519
  "description": task.description,
520
  "difficulty": task.difficulty,
521
  "token_budget": task.token_budget,
 
 
522
  }
523
  for task in ALL_TASKS
524
  ]
525
 
526
 
 
 
 
 
 
 
 
 
527
  if __name__ == "__main__":
528
  import uvicorn
529
 
 
20
 
21
  class ResetRequest(BaseModel):
22
  task_name: Literal["single_domain_qa", "cross_domain_synthesis", "adversarial_compression"]
23
+ custom_query: str | None = None
24
+ token_budget: int | None = None
25
+ max_steps: int | None = None
26
 
27
 
28
  @asynccontextmanager
 
52
  UI_HTML = """
53
  <!doctype html>
54
  <html lang="en">
55
+ <head>
56
+ <meta charset="utf-8" />
57
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
58
+ <title>rag-context-optimizer</title>
59
+ <style>
60
+ :root { --bg:#f5f7fb; --panel:#fff; --ink:#132238; --muted:#64748b; --line:#dbe4f0; --accent:#0f766e; --accent2:#0284c7; }
61
+ * { box-sizing:border-box; }
62
+ body { margin:0; font-family:ui-sans-serif,system-ui,Segoe UI,sans-serif; color:var(--ink); background:linear-gradient(180deg,#f8fbff,var(--bg)); }
63
+ .wrap { max-width:1280px; margin:0 auto; padding:24px; }
64
+ .hero, .main, .panels { display:grid; gap:16px; }
65
+ .hero { grid-template-columns:1.2fr 0.8fr; margin-bottom:16px; }
66
+ .main { grid-template-columns:320px 1fr; }
67
+ .panels { grid-template-columns:1fr 1fr; margin-top:16px; }
68
+ .card { background:var(--panel); border:1px solid var(--line); border-radius:20px; padding:18px; box-shadow:0 10px 24px rgba(15,23,42,.05); }
69
+ h1,h2,h3,p { margin-top:0; }
70
+ .muted { color:var(--muted); line-height:1.5; }
71
+ .stats { display:grid; grid-template-columns:repeat(3,1fr); gap:12px; margin-top:14px; }
72
+ .stat { border:1px solid var(--line); border-radius:16px; padding:12px; }
73
+ .stat .k { font-size:.82rem; color:var(--muted); }
74
+ .stat .v { font-size:1.35rem; font-weight:700; margin-top:6px; }
75
+ .task-list { display:grid; gap:10px; }
76
+ .task-btn { width:100%; text-align:left; border:1px solid var(--line); border-radius:14px; padding:12px; background:#fff; cursor:pointer; }
77
+ .task-btn.active, .task-btn:hover { border-color:var(--accent2); box-shadow:0 6px 14px rgba(2,132,199,.12); }
78
+ .toolbar, .row { display:flex; gap:10px; flex-wrap:wrap; align-items:center; }
79
+ .toolbar { margin-bottom:12px; }
80
+ button, input, textarea { font:inherit; border-radius:12px; }
81
+ button { border:none; padding:10px 14px; cursor:pointer; font-weight:700; }
82
+ .primary { background:linear-gradient(135deg,var(--accent),var(--accent2)); color:#fff; }
83
+ .secondary { background:#fff; color:var(--ink); border:1px solid var(--line); }
84
+ input, textarea { width:100%; border:1px solid var(--line); padding:12px; }
85
+ textarea { min-height:96px; resize:vertical; }
86
+ .query { padding:14px; background:#f8fafc; border:1px solid var(--line); border-radius:16px; font-weight:600; margin-bottom:12px; }
87
+ .feedback { min-height:22px; color:#b45309; font-weight:600; }
88
+ .chunks { display:grid; grid-template-columns:repeat(auto-fill,minmax(250px,1fr)); gap:12px; }
89
+ .chunk { border:1px solid var(--line); border-radius:16px; padding:12px; background:#fff; }
90
+ .chunk.selected { border-color:var(--accent); background:#f0fdfa; }
91
+ .pill { display:inline-block; margin:0 6px 6px 0; padding:3px 8px; border-radius:999px; background:#eef6ff; color:#1d4ed8; font-size:.75rem; }
92
+ details { margin-top:16px; }
93
+ pre { white-space:pre-wrap; word-break:break-word; font-size:.86rem; background:#0f172a; color:#e2e8f0; padding:14px; border-radius:14px; overflow:auto; }
94
+ .small-input { width:160px; }
95
+ @media (max-width:980px) { .hero,.main,.panels,.stats { grid-template-columns:1fr; } .small-input { width:100%; } }
96
+ </style>
97
+ </head>
98
+ <body>
99
+ <div class="wrap">
100
+ <div class="hero">
101
+ <div class="card">
102
+ <h1>RAG Context Optimizer</h1>
103
+ <p class="muted">Use any prompt you want. The UI can reset a session with your custom query, token budget, and step limit, then help minimize token usage by selecting only high-value chunks and compressing heavy evidence before answering.</p>
104
+ <div class="stats">
105
+ <div class="stat"><div class="k">Token Budget</div><div class="v" id="budgetStat">-</div></div>
106
+ <div class="stat"><div class="k">Tokens Used</div><div class="v" id="usedStat">-</div></div>
107
+ <div class="stat"><div class="k">Step</div><div class="v" id="stepStat">-</div></div>
108
+ </div>
109
+ </div>
110
+ <div class="card">
111
+ <h3>How To Use It</h3>
112
+ <p class="muted">Pick a task as a corpus shape, then overwrite the prompt with anything you want. Use Auto Optimize to choose chunks by relevance-per-token, or run manually. The API endpoints remain available at <code>/reset</code>, <code>/step</code>, and <code>/state</code>.</p>
113
+ <div class="row">
114
+ <a href="/docs" target="_blank"><button class="secondary" type="button">API Docs</button></a>
115
+ <a href="/health" target="_blank"><button class="secondary" type="button">Health</button></a>
116
+ </div>
117
+ </div>
118
+ </div>
119
+
120
+ <div class="main">
121
+ <aside class="card">
122
+ <h3>Task Presets</h3>
123
+ <div id="taskList" class="task-list"></div>
124
+ </aside>
125
+
126
+ <section class="card">
127
+ <div class="toolbar">
128
+ <button id="resetBtn" class="primary" type="button">Start / Reset</button>
129
+ <button id="autoStepBtn" class="secondary" type="button">Auto Optimize Step</button>
130
+ <button id="autoRunBtn" class="secondary" type="button">Auto Run</button>
131
+ <button id="refreshBtn" class="secondary" type="button">Refresh State</button>
132
+ </div>
133
+ <div class="row">
134
+ <input id="tokenBudgetInput" class="small-input" type="number" min="50" step="10" placeholder="Token budget" />
135
+ <input id="maxStepsInput" class="small-input" type="number" min="1" step="1" placeholder="Max steps" />
136
+ </div>
137
+ <div style="margin-top:12px;">
138
+ <label for="customQueryInput"><strong>Prompt / Query</strong></label>
139
+ <textarea id="customQueryInput" placeholder="Enter any prompt you want to optimize for minimal token usage."></textarea>
140
+ </div>
141
+ <div id="queryBox" class="query">Set a prompt and press Start / Reset.</div>
142
+ <div id="feedbackBox" class="feedback"></div>
143
+
144
+ <h3>Available Chunks</h3>
145
+ <div id="chunkGrid" class="chunks"></div>
146
+
147
+ <h3 style="margin-top:18px;">Answer</h3>
148
+ <textarea id="answerInput" placeholder="Write your final answer here."></textarea>
149
+ <div class="row" style="margin-top:10px;">
150
+ <button id="submitBtn" class="primary" type="button">Submit Answer</button>
151
+ </div>
152
+
153
+ <div class="panels">
154
+ <details class="card" open>
155
+ <summary><strong>Observation</strong></summary>
156
+ <pre id="observationPanel">{}</pre>
157
+ </details>
158
+ <details class="card">
159
+ <summary><strong>State</strong></summary>
160
+ <pre id="statePanel">{}</pre>
161
+ </details>
162
+ </div>
163
+ </section>
164
+ </div>
165
+ </div>
166
+ <script>
167
+ const taskList = document.getElementById("taskList");
168
+ const chunkGrid = document.getElementById("chunkGrid");
169
+ const queryBox = document.getElementById("queryBox");
170
+ const observationPanel = document.getElementById("observationPanel");
171
+ const statePanel = document.getElementById("statePanel");
172
+ const feedbackBox = document.getElementById("feedbackBox");
173
+ const budgetStat = document.getElementById("budgetStat");
174
+ const usedStat = document.getElementById("usedStat");
175
+ const stepStat = document.getElementById("stepStat");
176
+ const answerInput = document.getElementById("answerInput");
177
+ const customQueryInput = document.getElementById("customQueryInput");
178
+ const tokenBudgetInput = document.getElementById("tokenBudgetInput");
179
+ const maxStepsInput = document.getElementById("maxStepsInput");
180
+ let selectedTask = "single_domain_qa";
181
+ let currentObservation = null;
182
+ let allTasks = [];
183
+
184
+ async function fetchJson(url, options = {}) {
185
+ const response = await fetch(url, { headers: { "Content-Type": "application/json" }, ...options });
186
+ const body = await response.json();
187
+ if (!response.ok) throw new Error(body.detail || JSON.stringify(body));
188
+ return body;
189
+ }
190
+
191
+ function syncInputsFromTask() {
192
+ const task = allTasks.find(item => item.name === selectedTask);
193
+ if (!task) return;
194
+ if (!customQueryInput.dataset.userEdited || !customQueryInput.value.trim()) customQueryInput.value = task.query;
195
+ tokenBudgetInput.value = task.token_budget;
196
+ maxStepsInput.value = task.max_steps;
197
+ }
198
+
199
+ function renderTasks(tasks) {
200
+ taskList.innerHTML = "";
201
+ tasks.forEach(task => {
202
+ const btn = document.createElement("button");
203
+ btn.className = "task-btn" + (task.name === selectedTask ? " active" : "");
204
+ btn.innerHTML = `<div><strong>${task.name}</strong></div><div class="muted">${task.difficulty} · budget ${task.token_budget} · max steps ${task.max_steps}</div><div class="muted">${task.description}</div>`;
205
+ btn.onclick = () => {
206
+ selectedTask = task.name;
207
+ customQueryInput.dataset.userEdited = "";
208
+ renderTasks(tasks);
209
+ syncInputsFromTask();
210
+ };
211
+ taskList.appendChild(btn);
212
+ });
213
+ }
214
+
215
+ function renderObservation(observation) {
216
+ currentObservation = observation;
217
+ queryBox.textContent = observation.query;
218
+ budgetStat.textContent = observation.token_budget;
219
+ usedStat.textContent = observation.total_tokens_used;
220
+ stepStat.textContent = observation.step_number;
221
+ feedbackBox.textContent = observation.last_action_feedback || "";
222
+ observationPanel.textContent = JSON.stringify(observation, null, 2);
223
+ const selected = new Set(observation.selected_chunks || []);
224
+ chunkGrid.innerHTML = "";
225
+ observation.available_chunks.forEach(chunk => {
226
+ const card = document.createElement("div");
227
+ card.className = "chunk" + (selected.has(chunk.chunk_id) ? " selected" : "");
228
+ card.innerHTML = `
229
+ <h4>${chunk.chunk_id}</h4>
230
+ <div class="row"><span class="pill">${chunk.domain}</span><span class="pill">${chunk.tokens} tokens</span></div>
231
+ <div>${chunk.keywords.map(keyword => `<span class="pill">${keyword}</span>`).join("")}</div>
232
+ <div class="row" style="margin-top:10px;">
233
+ ${selected.has(chunk.chunk_id)
234
+ ? `<button class="secondary" data-action="deselect" data-id="${chunk.chunk_id}" type="button">Deselect</button>`
235
+ : `<button class="secondary" data-action="select" data-id="${chunk.chunk_id}" type="button">Select</button>`}
236
+ <button class="secondary" data-action="compress" data-id="${chunk.chunk_id}" type="button">Compress 50%</button>
237
+ </div>
238
+ `;
239
+ chunkGrid.appendChild(card);
240
+ });
241
+ chunkGrid.querySelectorAll("button[data-action]").forEach(btn => {
242
+ btn.addEventListener("click", async () => {
243
+ const action = btn.dataset.action;
244
+ const chunkId = btn.dataset.id;
245
+ if (action === "select") await step({ action_type: "select_chunk", chunk_id: chunkId });
246
+ if (action === "deselect") await step({ action_type: "deselect_chunk", chunk_id: chunkId });
247
+ if (action === "compress") await step({ action_type: "compress_chunk", chunk_id: chunkId, compression_ratio: 0.5 });
248
+ });
249
+ });
250
+ }
251
+
252
+ async function refreshState() {
253
+ const state = await fetchJson("/state", { method: "GET" });
254
+ statePanel.textContent = JSON.stringify(state, null, 2);
255
+ }
256
+
257
+ async function resetTask() {
258
+ try {
259
+ const body = await fetchJson("/reset", {
260
+ method: "POST",
261
+ body: JSON.stringify({
262
+ task_name: selectedTask,
263
+ custom_query: customQueryInput.value,
264
+ token_budget: tokenBudgetInput.value ? Number(tokenBudgetInput.value) : null,
265
+ max_steps: maxStepsInput.value ? Number(maxStepsInput.value) : null
266
+ }),
267
+ });
268
+ renderObservation(body.observation);
269
+ await refreshState();
270
+ } catch (error) {
271
+ feedbackBox.textContent = error.message;
272
+ }
273
+ }
274
+
275
+ async function step(payload) {
276
+ try {
277
+ const body = await fetchJson("/step", { method: "POST", body: JSON.stringify(payload) });
278
+ renderObservation(body.observation);
279
+ if (body.info && body.info.grader_breakdown) {
280
+ feedbackBox.textContent = `Final score: ${Number(body.reward).toFixed(4)} | ${JSON.stringify(body.info.grader_breakdown)}`;
281
+ }
282
+ await refreshState();
283
+ } catch (error) {
284
+ feedbackBox.textContent = error.message;
285
+ }
286
+ }
287
+
288
+ async function optimizeStep() {
289
+ try {
290
+ const suggestion = await fetchJson("/optimize-step", { method: "POST" });
291
+ feedbackBox.textContent = "Optimizer suggestion: " + JSON.stringify(suggestion);
292
+ await step(suggestion);
293
+ } catch (error) {
294
+ feedbackBox.textContent = error.message;
295
+ }
296
+ }
297
+
298
+ async function autoRun() {
299
+ for (let i = 0; i < 12; i += 1) {
300
+ if (!currentObservation) break;
301
+ const suggestion = await fetchJson("/optimize-step", { method: "POST" });
302
+ await step(suggestion);
303
+ if (suggestion.action_type === "submit_answer") break;
304
+ }
305
+ }
306
+
307
+ document.getElementById("resetBtn").addEventListener("click", resetTask);
308
+ document.getElementById("autoStepBtn").addEventListener("click", optimizeStep);
309
+ document.getElementById("autoRunBtn").addEventListener("click", autoRun);
310
+ document.getElementById("refreshBtn").addEventListener("click", refreshState);
311
+ document.getElementById("submitBtn").addEventListener("click", async () => {
312
+ await step({ action_type: "submit_answer", answer: answerInput.value || "A concise answer synthesized from the selected evidence." });
313
+ });
314
+ customQueryInput.addEventListener("input", () => { customQueryInput.dataset.userEdited = "true"; });
315
+
316
+ (async function init() {
317
+ allTasks = await fetchJson("/tasks", { method: "GET" });
318
+ renderTasks(allTasks);
319
+ syncInputsFromTask();
320
+ await resetTask();
321
+ })();
322
+ </script>
323
+ </body>
324
+ </html>
325
+ """
326
+
327
+
328
+ UI_HTML_V2 = """
329
+ <!doctype html>
330
+ <html lang="en">
331
  <head>
332
  <meta charset="utf-8" />
333
  <meta name="viewport" content="width=device-width, initial-scale=1" />
334
  <title>rag-context-optimizer</title>
335
  <style>
336
  :root {
337
+ --bg: #f3f7fb;
338
  --panel: #ffffff;
339
+ --ink: #10243c;
340
+ --muted: #61748a;
341
+ --line: #d8e2ef;
342
  --accent: #0f766e;
343
+ --accent-alt: #0369a1;
344
  --warn: #b45309;
345
+ --selected: #ecfeff;
346
  }
347
  * { box-sizing: border-box; }
348
  body {
349
  margin: 0;
350
  font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
351
+ background: radial-gradient(circle at top left, #ffffff 0%, var(--bg) 65%);
352
  color: var(--ink);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  }
354
+ .wrap { max-width: 1360px; margin: 0 auto; padding: 24px; }
355
+ .grid { display: grid; grid-template-columns: 320px 1fr; gap: 16px; }
356
+ .stack { display: grid; gap: 16px; }
357
  .card {
358
  background: var(--panel);
359
  border: 1px solid var(--line);
360
+ border-radius: 22px;
361
  padding: 18px;
362
+ box-shadow: 0 10px 24px rgba(15, 23, 42, 0.05);
363
  }
364
  h1, h2, h3, p { margin-top: 0; }
365
+ .muted { color: var(--muted); line-height: 1.5; }
366
+ .stats { display: grid; grid-template-columns: repeat(4, 1fr); gap: 12px; margin-top: 16px; }
367
+ .stat { border: 1px solid var(--line); border-radius: 16px; padding: 12px; background: #fbfdff; }
368
+ .stat .label { font-size: 0.8rem; color: var(--muted); }
369
+ .stat .value { margin-top: 6px; font-size: 1.35rem; font-weight: 700; }
370
+ .task-list, .chunk-grid { display: grid; gap: 12px; }
371
+ .chunk-grid { grid-template-columns: repeat(auto-fill, minmax(255px, 1fr)); }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
  .task-btn {
373
  width: 100%;
374
  text-align: left;
375
  border: 1px solid var(--line);
376
+ border-radius: 16px;
377
+ padding: 12px;
378
  background: #fff;
 
 
379
  cursor: pointer;
380
+ font: inherit;
381
  }
382
  .task-btn:hover, .task-btn.active {
383
+ border-color: var(--accent-alt);
384
+ box-shadow: 0 8px 18px rgba(3, 105, 161, 0.10);
 
 
 
 
 
 
 
 
 
385
  }
386
+ .toolbar, .row { display: flex; gap: 10px; flex-wrap: wrap; align-items: center; }
387
+ button, input, textarea { border-radius: 12px; font: inherit; }
388
+ button {
 
 
 
 
 
 
389
  cursor: pointer;
390
+ padding: 10px 14px;
391
+ border: 1px solid transparent;
392
  font-weight: 700;
393
  }
394
+ .primary { background: linear-gradient(135deg, var(--accent), var(--accent-alt)); color: #fff; }
395
+ .secondary { background: #fff; border-color: var(--line); color: var(--ink); }
396
+ input, textarea { width: 100%; border: 1px solid var(--line); padding: 12px; background: #fff; }
397
+ textarea { resize: vertical; min-height: 110px; }
398
+ .query-box {
 
 
 
399
  padding: 14px;
 
 
 
 
 
 
 
 
 
 
 
400
  border: 1px solid var(--line);
401
  border-radius: 16px;
402
+ background: #f8fafc;
403
+ font-weight: 600;
404
+ min-height: 60px;
 
 
 
405
  }
406
+ .feedback { min-height: 24px; color: var(--warn); font-weight: 600; }
407
+ .chunk-card { border: 1px solid var(--line); border-radius: 18px; padding: 12px; background: #fff; }
408
+ .chunk-card.selected { border-color: var(--accent); background: var(--selected); }
409
  .pill {
410
  display: inline-block;
411
+ margin: 0 6px 6px 0;
412
+ padding: 4px 8px;
413
  border-radius: 999px;
414
+ background: #eff6ff;
 
415
  color: #1d4ed8;
416
  font-size: 0.75rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  }
418
+ .json-panels { display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-top: 16px; }
419
  pre {
420
  margin: 0;
421
  white-space: pre-wrap;
422
  word-break: break-word;
423
+ font-size: 0.86rem;
424
+ line-height: 1.45;
425
  background: #0f172a;
426
  color: #e2e8f0;
427
  padding: 14px;
428
+ border-radius: 14px;
429
+ overflow: auto;
430
+ max-height: 420px;
 
 
 
 
431
  }
432
+ .helper-list { padding-left: 18px; margin: 0; color: var(--muted); }
433
+ .helper-list li + li { margin-top: 8px; }
434
+ .mono { font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace; }
435
+ @media (max-width: 980px) { .grid, .json-panels, .stats { grid-template-columns: 1fr; } }
436
  </style>
437
  </head>
438
  <body>
439
+ <div class="wrap">
440
+ <div class="card" style="margin-bottom:16px;">
441
+ <h1>RAG Context Optimizer</h1>
442
+ <p class="muted">
443
+ This UI works with any prompt you provide. Pick a preset only to choose a corpus shape and default budget, then replace the query with your own prompt and let the optimizer choose the highest-value evidence for the fewest tokens possible.
444
+ </p>
445
+ <div class="stats">
446
+ <div class="stat"><div class="label">Task Preset</div><div class="value" id="taskStat">-</div></div>
447
+ <div class="stat"><div class="label">Token Budget</div><div class="value" id="budgetStat">-</div></div>
448
+ <div class="stat"><div class="label">Tokens Used</div><div class="value" id="usedStat">-</div></div>
449
+ <div class="stat"><div class="label">Step</div><div class="value" id="stepStat">-</div></div>
 
 
 
450
  </div>
451
+ </div>
452
+
453
+ <div class="grid">
454
+ <aside class="stack">
455
+ <div class="card">
456
+ <h3>Task Presets</h3>
457
+ <p class="muted">These only set the default corpus flavor, budget, and step limit. Your prompt can be anything.</p>
458
+ <div id="taskList" class="task-list"></div>
459
+ </div>
460
+ <div class="card">
461
+ <h3>How It Optimizes</h3>
462
+ <ul class="helper-list">
463
+ <li>Ranks chunks by relevance per token, not just raw overlap.</li>
464
+ <li>Compresses heavier evidence before wasting budget.</li>
465
+ <li>Stops early once it has enough support to answer.</li>
466
+ <li>Keeps the OpenEnv endpoints live at <span class="mono">/reset</span>, <span class="mono">/step</span>, and <span class="mono">/state</span>.</li>
467
+ </ul>
468
+ <div class="toolbar" style="margin-top:16px;">
469
+ <a href="/docs" target="_blank"><button class="secondary" type="button">API Docs</button></a>
470
+ <a href="/health" target="_blank"><button class="secondary" type="button">Health</button></a>
471
+ </div>
472
  </div>
 
 
 
 
 
 
 
473
  </aside>
474
 
475
+ <section class="stack">
476
+ <div class="card">
477
+ <div class="toolbar" style="margin-bottom:12px;">
478
+ <button id="resetBtn" class="primary" type="button">Start / Reset</button>
479
+ <button id="autoStepBtn" class="secondary" type="button">Auto Optimize Step</button>
480
+ <button id="autoRunBtn" class="secondary" type="button">Auto Run</button>
481
+ <button id="refreshBtn" class="secondary" type="button">Refresh State</button>
482
+ </div>
483
+ <div class="row">
484
+ <input id="tokenBudgetInput" type="number" min="50" step="10" placeholder="Token budget" style="max-width:180px;" />
485
+ <input id="maxStepsInput" type="number" min="1" step="1" placeholder="Max steps" style="max-width:180px;" />
486
+ </div>
487
+ <div style="margin-top:12px;">
488
+ <label for="customQueryInput"><strong>Custom Prompt</strong></label>
489
+ <textarea id="customQueryInput" placeholder="Enter any prompt you want to optimize for minimal token usage."></textarea>
490
+ </div>
491
+ <div style="margin-top:12px;">
492
+ <label for="answerInput"><strong>Manual Final Answer</strong></label>
493
+ <textarea id="answerInput" placeholder="Optional. If left blank, auto-optimize will still submit an answer."></textarea>
494
+ </div>
495
+ <div class="toolbar" style="margin-top:12px;">
496
+ <button id="submitBtn" class="primary" type="button">Submit Manual Answer</button>
497
+ </div>
498
  </div>
 
 
499
 
500
+ <div class="card">
501
+ <h3>Active Query</h3>
502
+ <div id="queryBox" class="query-box">No episode has started yet. Enter your prompt, then press Start / Reset.</div>
503
+ <div id="feedbackBox" class="feedback" style="margin-top:12px;"></div>
504
+ </div>
505
 
506
+ <div class="card">
507
+ <div class="toolbar" style="justify-content:space-between;">
508
+ <h3 style="margin-bottom:0;">Available Chunks</h3>
509
+ <div class="muted">Select manually or let Auto Optimize choose the best evidence per token.</div>
510
+ </div>
511
+ <div id="chunkGrid" class="chunk-grid" style="margin-top:12px;"></div>
512
  </div>
513
 
514
+ <div class="json-panels">
515
+ <div class="card">
516
  <h3>Observation</h3>
517
  <pre id="observationPanel">{}</pre>
518
  </div>
519
+ <div class="card">
520
  <h3>State</h3>
521
  <pre id="statePanel">{}</pre>
522
  </div>
523
  </div>
524
+ </section>
525
+ </div>
526
  </div>
 
527
  <script>
528
  const taskList = document.getElementById("taskList");
529
  const chunkGrid = document.getElementById("chunkGrid");
 
534
  const budgetStat = document.getElementById("budgetStat");
535
  const usedStat = document.getElementById("usedStat");
536
  const stepStat = document.getElementById("stepStat");
537
+ const taskStat = document.getElementById("taskStat");
538
  const answerInput = document.getElementById("answerInput");
539
+ const customQueryInput = document.getElementById("customQueryInput");
540
+ const tokenBudgetInput = document.getElementById("tokenBudgetInput");
541
+ const maxStepsInput = document.getElementById("maxStepsInput");
542
+
543
  let selectedTask = "single_domain_qa";
544
  let currentObservation = null;
545
+ let currentDone = false;
546
+ let allTasks = [];
547
 
548
  async function fetchJson(url, options = {}) {
549
  const response = await fetch(url, {
550
  headers: { "Content-Type": "application/json" },
551
+ ...options
552
  });
553
  const body = await response.json();
554
+ if (!response.ok) throw new Error(body.detail || JSON.stringify(body));
 
 
555
  return body;
556
  }
557
 
558
+ function getSelectedTaskMeta() {
559
+ return allTasks.find((task) => task.name === selectedTask);
560
+ }
561
+
562
+ function syncInputsFromTask() {
563
+ const task = getSelectedTaskMeta();
564
+ if (!task) return;
565
+ tokenBudgetInput.value = task.token_budget;
566
+ maxStepsInput.value = task.max_steps;
567
+ }
568
+
569
  function renderTasks(tasks) {
570
  taskList.innerHTML = "";
571
+ tasks.forEach((task) => {
572
  const btn = document.createElement("button");
573
+ btn.type = "button";
574
  btn.className = "task-btn" + (task.name === selectedTask ? " active" : "");
575
+ btn.innerHTML = `
576
+ <div><strong>${task.name}</strong></div>
577
+ <div class="muted">${task.difficulty} | budget ${task.token_budget} | max steps ${task.max_steps}</div>
578
+ <div class="muted">${task.description}</div>
579
+ `;
580
+ btn.addEventListener("click", () => {
581
  selectedTask = task.name;
582
  renderTasks(tasks);
583
+ syncInputsFromTask();
584
+ taskStat.textContent = task.name;
585
+ feedbackBox.textContent = "Preset changed. Add your prompt and press Start / Reset.";
586
+ });
587
  taskList.appendChild(btn);
588
  });
589
  }
590
 
591
  function renderObservation(observation) {
592
  currentObservation = observation;
593
+ queryBox.textContent = observation.query || "No active query.";
594
  budgetStat.textContent = observation.token_budget;
595
  usedStat.textContent = observation.total_tokens_used;
596
  stepStat.textContent = observation.step_number;
597
+ taskStat.textContent = observation.task_name;
598
+ if (observation.last_action_feedback) feedbackBox.textContent = observation.last_action_feedback;
599
  observationPanel.textContent = JSON.stringify(observation, null, 2);
600
 
601
  const selected = new Set(observation.selected_chunks || []);
602
  chunkGrid.innerHTML = "";
603
+ observation.available_chunks.forEach((chunk) => {
604
  const card = document.createElement("div");
605
+ card.className = "chunk-card" + (selected.has(chunk.chunk_id) ? " selected" : "");
606
+ const keywordHtml = (chunk.keywords || []).map((keyword) => `<span class="pill">${keyword}</span>`).join("");
 
 
 
607
  card.innerHTML = `
608
  <h4>${chunk.chunk_id}</h4>
609
+ <div style="margin-bottom:8px;">
610
+ <span class="pill">${chunk.domain}</span>
611
+ <span class="pill">${chunk.tokens} tokens</span>
612
+ </div>
613
+ <div>${keywordHtml}</div>
614
+ <div class="toolbar" style="margin-top:12px;">
615
+ <button class="secondary" type="button" data-action="${selected.has(chunk.chunk_id) ? "deselect_chunk" : "select_chunk"}" data-id="${chunk.chunk_id}">
616
+ ${selected.has(chunk.chunk_id) ? "Deselect" : "Select"}
617
+ </button>
618
+ <button class="secondary" type="button" data-action="compress_chunk" data-id="${chunk.chunk_id}">Compress 50%</button>
619
  </div>
620
  `;
621
  chunkGrid.appendChild(card);
622
  });
623
 
624
+ chunkGrid.querySelectorAll("button[data-action]").forEach((button) => {
625
+ button.addEventListener("click", async () => {
626
+ const actionType = button.dataset.action;
627
+ const chunkId = button.dataset.id;
628
+ if (actionType === "compress_chunk") {
629
+ await step({ action_type: actionType, chunk_id: chunkId, compression_ratio: 0.5 });
630
+ } else {
631
+ await step({ action_type: actionType, chunk_id: chunkId });
 
 
632
  }
633
  });
634
  });
635
  }
636
 
637
  async function refreshState() {
638
+ try {
639
+ const state = await fetchJson("/state", { method: "GET" });
640
+ statePanel.textContent = JSON.stringify(state, null, 2);
641
+ } catch (error) {
642
+ statePanel.textContent = JSON.stringify({ error: error.message }, null, 2);
643
+ }
644
  }
645
 
646
  async function resetTask() {
647
+ const customQuery = customQueryInput.value.trim();
648
+ if (!customQuery) {
649
+ feedbackBox.textContent = "Please enter your own prompt first.";
650
+ return;
651
+ }
652
+ try {
653
+ const body = await fetchJson("/reset", {
654
+ method: "POST",
655
+ body: JSON.stringify({
656
+ task_name: selectedTask,
657
+ custom_query: customQuery,
658
+ token_budget: tokenBudgetInput.value ? Number(tokenBudgetInput.value) : null,
659
+ max_steps: maxStepsInput.value ? Number(maxStepsInput.value) : null
660
+ })
661
+ });
662
+ currentDone = false;
663
+ renderObservation(body.observation);
664
+ feedbackBox.textContent = "Episode started with your custom prompt.";
665
+ await refreshState();
666
+ } catch (error) {
667
+ feedbackBox.textContent = error.message;
668
+ }
669
  }
670
 
671
  async function step(payload) {
672
+ if (!currentObservation) {
673
+ feedbackBox.textContent = "Start an episode first.";
674
+ return null;
675
+ }
676
  try {
677
  const body = await fetchJson("/step", {
678
  method: "POST",
679
+ body: JSON.stringify(payload)
680
  });
681
  renderObservation(body.observation);
682
+ currentDone = Boolean(body.done);
683
  if (body.info && body.info.grader_breakdown) {
684
+ feedbackBox.textContent = `Final score ${Number(body.reward || 0).toFixed(4)} | ${JSON.stringify(body.info.grader_breakdown)}`;
685
  }
686
  await refreshState();
687
+ return body;
688
+ } catch (error) {
689
+ feedbackBox.textContent = error.message;
690
+ return null;
691
+ }
692
+ }
693
+
694
+ async function optimizeStep() {
695
+ if (!currentObservation) {
696
+ feedbackBox.textContent = "Start an episode first.";
697
+ return;
698
+ }
699
+ try {
700
+ const suggestion = await fetchJson("/optimize-step", { method: "POST" });
701
+ feedbackBox.textContent = "Optimizer chose: " + JSON.stringify(suggestion);
702
+ await step(suggestion);
703
  } catch (error) {
704
  feedbackBox.textContent = error.message;
705
  }
706
  }
707
 
708
+ async function autoRun() {
709
+ if (!currentObservation) {
710
+ feedbackBox.textContent = "Start an episode first.";
711
+ return;
712
+ }
713
+ for (let index = 0; index < 20; index += 1) {
714
+ if (currentDone) break;
715
+ const suggestion = await fetchJson("/optimize-step", { method: "POST" });
716
+ const result = await step(suggestion);
717
+ if (!result || result.done || suggestion.action_type === "submit_answer") break;
718
+ }
719
+ }
720
+
721
  document.getElementById("resetBtn").addEventListener("click", resetTask);
722
+ document.getElementById("autoStepBtn").addEventListener("click", optimizeStep);
723
+ document.getElementById("autoRunBtn").addEventListener("click", autoRun);
724
  document.getElementById("refreshBtn").addEventListener("click", refreshState);
725
  document.getElementById("submitBtn").addEventListener("click", async () => {
726
+ const manualAnswer = answerInput.value.trim() || "Concise answer synthesized from the selected evidence.";
727
+ await step({ action_type: "submit_answer", answer: manualAnswer });
728
  });
729
 
730
  (async function init() {
731
+ allTasks = await fetchJson("/tasks", { method: "GET" });
732
+ renderTasks(allTasks);
733
+ syncInputsFromTask();
734
+ taskStat.textContent = selectedTask;
735
+ feedbackBox.textContent = "Add any prompt you want, then press Start / Reset.";
736
+ observationPanel.textContent = JSON.stringify({ message: "No active episode yet." }, null, 2);
737
+ statePanel.textContent = JSON.stringify({ message: "No active episode yet." }, null, 2);
738
  })();
739
  </script>
740
  </body>
 
752
 
753
  @app.get("/", response_class=HTMLResponse)
754
  async def home_page():
755
+ return HTMLResponse(UI_HTML_V2)
756
 
757
 
758
  def _serialize_observation(observation: Any) -> dict[str, Any]:
 
791
  }
792
 
793
 
794
+ def _tokenize(text: str) -> set[str]:
795
+ import re
796
+
797
+ return set(re.findall(r"[a-z0-9]+", text.lower()))
798
+
799
+
800
+ def _suggest_action(env: RagContextOptimizerEnv) -> dict[str, Any]:
801
+ observation = env._build_observation()
802
+ query_terms = _tokenize(observation.query)
803
+ selected = set(observation.selected_chunks)
804
+ remaining_budget = observation.token_budget - observation.total_tokens_used
805
+
806
+ def score_chunk(chunk: Any) -> tuple[float, int, str]:
807
+ keyword_terms = _tokenize(" ".join(chunk.keywords))
808
+ overlap = len(query_terms & keyword_terms)
809
+ union = len(query_terms | keyword_terms) or 1
810
+ relevance = overlap / union
811
+ ratio = relevance / max(chunk.tokens, 1)
812
+ return (-ratio, chunk.tokens, chunk.chunk_id)
813
+
814
+ selected_chunks = [chunk for chunk in observation.available_chunks if chunk.chunk_id in selected]
815
+ if selected_chunks and (
816
+ observation.total_tokens_used >= int(observation.token_budget * 0.65)
817
+ or observation.step_number >= 3
818
+ ):
819
+ heavy = sorted(selected_chunks, key=lambda chunk: (-chunk.tokens, chunk.chunk_id))
820
+ if heavy and heavy[0].tokens > max(120, observation.token_budget // 4):
821
+ return {
822
+ "action_type": "compress_chunk",
823
+ "chunk_id": heavy[0].chunk_id,
824
+ "compression_ratio": 0.5,
825
+ }
826
+
827
+ if len(selected) >= 2 or observation.step_number >= max(2, env.task.max_steps - 2):
828
+ chosen_keywords: list[str] = []
829
+ for chunk in selected_chunks[:3]:
830
+ chosen_keywords.extend(chunk.keywords[:2])
831
+ answer = (
832
+ "Optimized answer based on selected evidence: "
833
+ + ", ".join(chosen_keywords[:6])
834
+ if chosen_keywords
835
+ else "Optimized answer based on the currently selected evidence."
836
+ )
837
+ return {"action_type": "submit_answer", "answer": answer}
838
+
839
+ available = [chunk for chunk in observation.available_chunks if chunk.chunk_id not in selected]
840
+ for chunk in sorted(available, key=score_chunk):
841
+ if chunk.tokens <= remaining_budget:
842
+ return {"action_type": "select_chunk", "chunk_id": chunk.chunk_id}
843
+
844
+ if selected_chunks:
845
+ return {
846
+ "action_type": "submit_answer",
847
+ "answer": "Optimized answer based on the currently selected evidence.",
848
+ }
849
+ if available:
850
+ smallest_chunk = min(available, key=lambda chunk: (chunk.tokens, chunk.chunk_id))
851
+ return {
852
+ "action_type": "submit_answer",
853
+ "answer": (
854
+ "No chunk fits within the current token budget. "
855
+ f"Increase the budget to at least {smallest_chunk.tokens} tokens or choose a broader budget."
856
+ ),
857
+ }
858
+ return {"action_type": "submit_answer", "answer": "No usable evidence was available."}
859
+
860
+
861
  @app.post("/reset")
862
  async def reset_endpoint(payload: ResetRequest):
863
  if payload.task_name not in TASKS_BY_NAME:
864
  raise HTTPException(status_code=400, detail="Unknown task_name.")
865
+ env = RagContextOptimizerEnv(
866
+ task_name=payload.task_name,
867
+ query_override=payload.custom_query,
868
+ token_budget_override=payload.token_budget,
869
+ max_steps_override=payload.max_steps,
870
+ )
871
  app.state.env = env
872
  result = await env.reset()
873
  return _serialize_step_result(result, reset=True)
 
907
  "description": task.description,
908
  "difficulty": task.difficulty,
909
  "token_budget": task.token_budget,
910
+ "query": task.query,
911
+ "max_steps": task.max_steps,
912
  }
913
  for task in ALL_TASKS
914
  ]
915
 
916
 
917
+ @app.post("/optimize-step")
918
+ async def optimize_step_endpoint():
919
+ env = getattr(app.state, "env", None)
920
+ if env is None:
921
+ raise HTTPException(status_code=400, detail="Environment is not initialized. Call /reset first.")
922
+ return _suggest_action(env)
923
+
924
+
925
  if __name__ == "__main__":
926
  import uvicorn
927
 
env/__pycache__/environment.cpython-314.pyc CHANGED
Binary files a/env/__pycache__/environment.cpython-314.pyc and b/env/__pycache__/environment.cpython-314.pyc differ
 
env/environment.py CHANGED
@@ -4,7 +4,7 @@ Main OpenEnv-style environment for rag-context-optimizer.
4
 
5
  from __future__ import annotations
6
 
7
- from dataclasses import asdict, dataclass, is_dataclass
8
  from pathlib import Path
9
  from typing import Any
10
 
@@ -24,7 +24,13 @@ class StepResult:
24
 
25
 
26
  class RagContextOptimizerEnv:
27
- def __init__(self, task_name: str = "single_domain_qa"):
 
 
 
 
 
 
28
  if task_name not in TASKS_BY_NAME:
29
  raise ValueError(f"Unknown task_name: {task_name}")
30
 
@@ -32,7 +38,12 @@ class RagContextOptimizerEnv:
32
  self._all_chunks = load_corpus(self._corpus_path)
33
  self.retriever = HybridRetriever(self._all_chunks)
34
  self.grader = TaskGrader()
35
- self.task: Task = TASKS_BY_NAME[task_name]
 
 
 
 
 
36
 
37
  self._available_chunks: list[Chunk] = []
38
  self._selected_chunks: list[str] = []
@@ -42,6 +53,22 @@ class RagContextOptimizerEnv:
42
  self._last_action_feedback: str | None = None
43
  self._last_answer = ""
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  async def reset(self) -> StepResult:
46
  self._available_chunks = self._filter_chunks_for_task(self.task)
47
  self._selected_chunks = []
 
4
 
5
  from __future__ import annotations
6
 
7
+ from dataclasses import asdict, dataclass, is_dataclass, replace
8
  from pathlib import Path
9
  from typing import Any
10
 
 
24
 
25
 
26
  class RagContextOptimizerEnv:
27
+ def __init__(
28
+ self,
29
+ task_name: str = "single_domain_qa",
30
+ query_override: str | None = None,
31
+ token_budget_override: int | None = None,
32
+ max_steps_override: int | None = None,
33
+ ):
34
  if task_name not in TASKS_BY_NAME:
35
  raise ValueError(f"Unknown task_name: {task_name}")
36
 
 
38
  self._all_chunks = load_corpus(self._corpus_path)
39
  self.retriever = HybridRetriever(self._all_chunks)
40
  self.grader = TaskGrader()
41
+ self.task: Task = self._build_task(
42
+ TASKS_BY_NAME[task_name],
43
+ query_override=query_override,
44
+ token_budget_override=token_budget_override,
45
+ max_steps_override=max_steps_override,
46
+ )
47
 
48
  self._available_chunks: list[Chunk] = []
49
  self._selected_chunks: list[str] = []
 
53
  self._last_action_feedback: str | None = None
54
  self._last_answer = ""
55
 
56
+ @staticmethod
57
+ def _build_task(
58
+ base_task: Task,
59
+ query_override: str | None = None,
60
+ token_budget_override: int | None = None,
61
+ max_steps_override: int | None = None,
62
+ ) -> Task:
63
+ updated_task = base_task
64
+ if query_override and query_override.strip():
65
+ updated_task = replace(updated_task, query=query_override.strip())
66
+ if token_budget_override is not None and token_budget_override > 0:
67
+ updated_task = replace(updated_task, token_budget=token_budget_override)
68
+ if max_steps_override is not None and max_steps_override > 0:
69
+ updated_task = replace(updated_task, max_steps=max_steps_override)
70
+ return updated_task
71
+
72
  async def reset(self) -> StepResult:
73
  self._available_chunks = self._filter_chunks_for_task(self.task)
74
  self._selected_chunks = []
streamlit_app.py CHANGED
@@ -6,9 +6,9 @@ import streamlit as st
6
 
7
  API_URL = st.secrets.get("API_URL", "http://localhost:7860") if hasattr(st, "secrets") else "http://localhost:7860"
8
 
9
- st.set_page_config(page_title="rag-context-optimizer", page_icon="📚", layout="wide")
10
  st.title("RAG Context Optimizer")
11
- st.caption("Streamlit control panel for the benchmark API.")
12
 
13
 
14
  def api_get(path: str):
@@ -17,52 +17,142 @@ def api_get(path: str):
17
  return response.json()
18
 
19
 
20
- def api_post(path: str, payload: dict):
21
- response = requests.post(f"{API_URL}{path}", json=payload, timeout=20)
22
  response.raise_for_status()
23
  return response.json()
24
 
25
 
26
- tasks = api_get("/tasks")
27
- task_names = [task["name"] for task in tasks]
28
- selected_task = st.sidebar.selectbox("Task", task_names)
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- if st.sidebar.button("Reset Task", use_container_width=True):
31
- st.session_state["reset_payload"] = api_post("/reset", {"task_name": selected_task})
32
 
33
- if "reset_payload" not in st.session_state:
34
- st.session_state["reset_payload"] = api_post("/reset", {"task_name": selected_task})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- payload = st.session_state["reset_payload"]
37
  observation = payload["observation"]
38
 
39
- col1, col2, col3 = st.columns(3)
40
- col1.metric("Budget", observation["token_budget"])
41
- col2.metric("Used", observation["total_tokens_used"])
42
- col3.metric("Step", observation["step_number"])
 
43
 
44
- st.subheader("Query")
45
  st.info(observation["query"])
46
 
47
- st.subheader("Chunks")
48
- for chunk in observation["available_chunks"]:
49
- with st.expander(f"{chunk['chunk_id']} · {chunk['domain']} · {chunk['tokens']} tokens"):
50
- st.write(", ".join(chunk["keywords"]))
51
- c1, c2 = st.columns(2)
52
- if c1.button(f"Select {chunk['chunk_id']}", key=f"select-{chunk['chunk_id']}"):
53
- st.session_state["reset_payload"] = api_post("/step", {"action_type": "select_chunk", "chunk_id": chunk["chunk_id"]})
54
- st.rerun()
55
- if c2.button(f"Compress {chunk['chunk_id']}", key=f"compress-{chunk['chunk_id']}"):
56
- st.session_state["reset_payload"] = api_post("/step", {"action_type": "compress_chunk", "chunk_id": chunk["chunk_id"], "compression_ratio": 0.5})
57
- st.rerun()
58
-
59
- answer = st.text_area("Answer", "A concise answer synthesized from the selected evidence.")
60
- if st.button("Submit Answer", type="primary"):
61
- st.session_state["reset_payload"] = api_post("/step", {"action_type": "submit_answer", "answer": answer})
 
 
 
62
  st.rerun()
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  st.subheader("Observation")
65
- st.json(st.session_state["reset_payload"])
66
 
67
  st.subheader("State")
68
  st.json(api_get("/state"))
 
6
 
7
  API_URL = st.secrets.get("API_URL", "http://localhost:7860") if hasattr(st, "secrets") else "http://localhost:7860"
8
 
9
+ st.set_page_config(page_title="rag-context-optimizer", page_icon="R", layout="wide")
10
  st.title("RAG Context Optimizer")
11
+ st.caption("Use any prompt, keep the token budget tight, and let the optimizer pick the best evidence per token.")
12
 
13
 
14
  def api_get(path: str):
 
17
  return response.json()
18
 
19
 
20
+ def api_post(path: str, payload: dict | None = None):
21
+ response = requests.post(f"{API_URL}{path}", json=payload or {}, timeout=20)
22
  response.raise_for_status()
23
  return response.json()
24
 
25
 
26
+ def start_episode(task_name: str, query: str, token_budget: int, max_steps: int):
27
+ st.session_state["payload"] = api_post(
28
+ "/reset",
29
+ {
30
+ "task_name": task_name,
31
+ "custom_query": query,
32
+ "token_budget": token_budget,
33
+ "max_steps": max_steps,
34
+ },
35
+ )
36
+
37
+
38
+ def do_step(payload: dict):
39
+ st.session_state["payload"] = api_post("/step", payload)
40
 
 
 
41
 
42
+ tasks = api_get("/tasks")
43
+ task_map = {task["name"]: task for task in tasks}
44
+
45
+ selected_task = st.sidebar.selectbox("Task preset", list(task_map))
46
+ task_meta = task_map[selected_task]
47
+
48
+ default_query = st.session_state.get("custom_query", "")
49
+ custom_query = st.sidebar.text_area(
50
+ "Custom prompt",
51
+ value=default_query,
52
+ height=180,
53
+ placeholder="Enter any prompt you want to optimize for minimal token usage.",
54
+ )
55
+ token_budget = st.sidebar.number_input(
56
+ "Token budget",
57
+ min_value=50,
58
+ value=int(task_meta["token_budget"]),
59
+ step=10,
60
+ )
61
+ max_steps = st.sidebar.number_input(
62
+ "Max steps",
63
+ min_value=1,
64
+ value=int(task_meta["max_steps"]),
65
+ step=1,
66
+ )
67
+
68
+ st.session_state["custom_query"] = custom_query
69
+
70
+ sidebar_cols = st.sidebar.columns(2)
71
+ if sidebar_cols[0].button("Start / Reset", use_container_width=True):
72
+ if not custom_query.strip():
73
+ st.sidebar.error("Enter a custom prompt first.")
74
+ else:
75
+ start_episode(selected_task, custom_query.strip(), int(token_budget), int(max_steps))
76
+ st.rerun()
77
+
78
+ if sidebar_cols[1].button("Refresh", use_container_width=True):
79
+ st.rerun()
80
+
81
+ if "payload" not in st.session_state:
82
+ st.info("Add your prompt in the sidebar and press Start / Reset.")
83
+ st.stop()
84
 
85
+ payload = st.session_state["payload"]
86
  observation = payload["observation"]
87
 
88
+ col1, col2, col3, col4 = st.columns(4)
89
+ col1.metric("Task", observation["task_name"])
90
+ col2.metric("Budget", observation["token_budget"])
91
+ col3.metric("Used", observation["total_tokens_used"])
92
+ col4.metric("Step", observation["step_number"])
93
 
94
+ st.subheader("Active Query")
95
  st.info(observation["query"])
96
 
97
+ feedback = observation.get("last_action_feedback")
98
+ if feedback:
99
+ st.warning(feedback)
100
+ if payload.get("info", {}).get("grader_breakdown"):
101
+ st.success(f"Final score: {payload.get('reward', 0):.4f}")
102
+ st.json(payload["info"]["grader_breakdown"])
103
+
104
+ action_cols = st.columns(3)
105
+ if action_cols[0].button("Auto Optimize Step", use_container_width=True):
106
+ suggestion = api_post("/optimize-step")
107
+ do_step(suggestion)
108
+ st.rerun()
109
+ if action_cols[1].button("Auto Run", use_container_width=True):
110
+ for _ in range(20):
111
+ suggestion = api_post("/optimize-step")
112
+ do_step(suggestion)
113
+ if suggestion["action_type"] == "submit_answer" or st.session_state["payload"]["done"]:
114
+ break
115
  st.rerun()
116
 
117
+ manual_answer = action_cols[2].text_input("Manual answer", value="")
118
+ if st.button("Submit Manual Answer", type="primary", use_container_width=True):
119
+ do_step(
120
+ {
121
+ "action_type": "submit_answer",
122
+ "answer": manual_answer.strip() or "Concise answer synthesized from the selected evidence.",
123
+ }
124
+ )
125
+ st.rerun()
126
+
127
+ st.subheader("Available Chunks")
128
+ chunk_columns = st.columns(2)
129
+ for index, chunk in enumerate(observation["available_chunks"]):
130
+ selected = chunk["chunk_id"] in set(observation["selected_chunks"])
131
+ container = chunk_columns[index % 2].container(border=True)
132
+ container.markdown(f"**{chunk['chunk_id']}**")
133
+ container.caption(f"{chunk['domain']} | {chunk['tokens']} tokens")
134
+ container.write(", ".join(chunk["keywords"]))
135
+ c1, c2 = container.columns(2)
136
+ if selected:
137
+ if c1.button("Deselect", key=f"deselect-{chunk['chunk_id']}", use_container_width=True):
138
+ do_step({"action_type": "deselect_chunk", "chunk_id": chunk["chunk_id"]})
139
+ st.rerun()
140
+ else:
141
+ if c1.button("Select", key=f"select-{chunk['chunk_id']}", use_container_width=True):
142
+ do_step({"action_type": "select_chunk", "chunk_id": chunk["chunk_id"]})
143
+ st.rerun()
144
+ if c2.button("Compress 50%", key=f"compress-{chunk['chunk_id']}", use_container_width=True):
145
+ do_step(
146
+ {
147
+ "action_type": "compress_chunk",
148
+ "chunk_id": chunk["chunk_id"],
149
+ "compression_ratio": 0.5,
150
+ }
151
+ )
152
+ st.rerun()
153
+
154
  st.subheader("Observation")
155
+ st.json(payload)
156
 
157
  st.subheader("State")
158
  st.json(api_get("/state"))