chuckfinca Claude Opus 4.6 (1M context) commited on
Commit
04f8810
·
1 Parent(s): 04d3f5a

Simplify explorer to pure API backend

Browse files

Remove custom HTML/JS frontend — it lives on the website at
/explore. Keep only Gradio API endpoints: ask, upload, doc,
traces, replay.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +30 -268
app.py CHANGED
@@ -1,7 +1,7 @@
1
- """Document Explorer — upload documents, ask questions with cited answers.
2
 
3
- The LLM explores documents by writing Python code. No training, no vector DB.
4
- Built on a-simple-llm-harness.
5
  """
6
 
7
  from __future__ import annotations
@@ -73,9 +73,9 @@ def upload_trace(result: dict) -> None:
73
  print(f"WARNING: trace upload failed: {exc}")
74
 
75
 
76
- def save_uploaded_files(files: list[str]) -> Path:
77
  workspace = Path(tempfile.mkdtemp(prefix="lh-workspace-"))
78
- for file_path in files:
79
  src = Path(file_path)
80
  (workspace / src.name).write_bytes(src.read_bytes())
81
  return workspace
@@ -128,7 +128,6 @@ def stream_question(
128
  scratch_path: str,
129
  session_cost: float,
130
  token: str,
131
- files: list[str] | None = None,
132
  ) -> Generator[str, None, None]:
133
  """Streaming API — yields JSON event strings."""
134
  if ACCESS_TOKEN and token != ACCESS_TOKEN:
@@ -147,9 +146,6 @@ def stream_question(
147
  return
148
 
149
  workspace = Path(workspace_path) if workspace_path else None
150
- if files and not workspace:
151
- workspace = save_uploaded_files(files)
152
-
153
  if not workspace:
154
  yield json.dumps({"type": "error", "error": "No documents uploaded."})
155
  return
@@ -217,262 +213,24 @@ def stream_question(
217
 
218
 
219
  # ---------------------------------------------------------------------------
220
- # Gradio app
221
  # ---------------------------------------------------------------------------
222
 
223
- _STATIC_DIR = Path(__file__).parent / "static"
224
- _CHAT_UI_CSS = (_STATIC_DIR / "chat-ui.css").read_text() if (_STATIC_DIR / "chat-ui.css").exists() else ""
225
- _CHAT_UI_JS = (_STATIC_DIR / "chat-ui.js").read_text() if (_STATIC_DIR / "chat-ui.js").exists() else ""
226
-
227
- CUSTOM_HTML = """
228
- <div id="explorer-app">
229
- <div id="setup-panel">
230
- <div class="setup-field">
231
- <label for="token-input">Access Token</label>
232
- <input type="password" id="token-input" placeholder="Enter your access token" />
233
- </div>
234
- <div class="setup-field">
235
- <label>Upload Documents</label>
236
- <div id="drop-zone" class="drop-zone">
237
- <p>Drag and drop files here, or <label for="file-input" class="file-label">browse</label></p>
238
- <input type="file" id="file-input" multiple accept=".txt,.csv,.md,.json,.pdf" style="display:none" />
239
- <div id="file-list"></div>
240
- </div>
241
- </div>
242
- <button id="start-btn" disabled>Start exploring</button>
243
- </div>
244
-
245
- <div id="chat-panel" style="display:none">
246
- <div class="chat-container">
247
- <div class="chat-history" id="chat-history"></div>
248
- <div class="chat-input-wrapper">
249
- <input type="text" id="chat-input" class="chat-input" placeholder="Ask a question about your documents..." autocomplete="off" />
250
- </div>
251
- <div class="chat-stats" id="session-cost"></div>
252
- </div>
253
- </div>
254
- </div>
255
- """
256
-
257
- CUSTOM_CSS = _CHAT_UI_CSS + """
258
- #explorer-app { font-family: var(--font-family, 'Inter', system-ui, sans-serif); max-width: 800px; margin: 0 auto; }
259
- #setup-panel { padding: 24px 0; }
260
- .setup-field { margin-bottom: 16px; }
261
- .setup-field label { display: block; font-size: 13px; color: #6B7280; margin-bottom: 4px; }
262
- #token-input { width: 100%; padding: 8px 12px; border: 1px solid #E5E7EB; border-radius: 4px; font-family: inherit; font-size: 14px; }
263
- #token-input:focus { outline: none; border-color: #4682B4; }
264
- .drop-zone { border: 2px dashed #E5E7EB; border-radius: 8px; padding: 32px; text-align: center; color: #9CA3AF; cursor: pointer; transition: border-color 0.2s; }
265
- .drop-zone.drag-over { border-color: #4682B4; background: #f0f7ff; }
266
- .drop-zone p { margin: 0; }
267
- .file-label { color: #4682B4; cursor: pointer; text-decoration: underline; }
268
- #file-list { margin-top: 8px; font-size: 13px; color: #374151; text-align: left; }
269
- #file-list div { padding: 2px 0; }
270
- #start-btn { width: 100%; padding: 10px; background: #4682B4; color: white; border: none; border-radius: 4px; font-family: inherit; font-size: 14px; cursor: pointer; margin-top: 8px; }
271
- #start-btn:disabled { background: #D1D5DB; cursor: default; }
272
- #start-btn:not(:disabled):hover { background: #3a6f9a; }
273
- #chat-panel { padding-top: 8px; }
274
- """
275
-
276
- CUSTOM_JS = _CHAT_UI_JS + """
277
- ;document.addEventListener('DOMContentLoaded', function() {
278
- var API_BASE = window.location.origin;
279
- var tokenInput = document.getElementById('token-input');
280
- var fileInput = document.getElementById('file-input');
281
- var dropZone = document.getElementById('drop-zone');
282
- var fileList = document.getElementById('file-list');
283
- var startBtn = document.getElementById('start-btn');
284
- var setupPanel = document.getElementById('setup-panel');
285
- var chatPanel = document.getElementById('chat-panel');
286
- var chatInput = document.getElementById('chat-input');
287
- var chatHistory = document.getElementById('chat-history');
288
- var sessionCostEl = document.getElementById('session-cost');
289
-
290
- var selectedFiles = [];
291
- var workspacePath = '';
292
- var scratchPath = '';
293
- var sessionCost = 0;
294
-
295
- function updateStartBtn() {
296
- startBtn.disabled = !(selectedFiles.length > 0);
297
- }
298
-
299
- function showFiles() {
300
- fileList.innerHTML = selectedFiles.map(function(f) { return '<div>' + escapeHtml(f.name) + '</div>'; }).join('');
301
- updateStartBtn();
302
- }
303
-
304
- fileInput.addEventListener('change', function() {
305
- selectedFiles = Array.from(fileInput.files);
306
- showFiles();
307
- });
308
-
309
- dropZone.addEventListener('click', function() { fileInput.click(); });
310
- dropZone.addEventListener('dragover', function(e) { e.preventDefault(); dropZone.classList.add('drag-over'); });
311
- dropZone.addEventListener('dragleave', function() { dropZone.classList.remove('drag-over'); });
312
- dropZone.addEventListener('drop', function(e) {
313
- e.preventDefault();
314
- dropZone.classList.remove('drag-over');
315
- selectedFiles = Array.from(e.dataTransfer.files);
316
- showFiles();
317
- });
318
-
319
- startBtn.addEventListener('click', function() {
320
- startBtn.disabled = true;
321
- startBtn.textContent = 'Uploading...';
322
-
323
- var formData = new FormData();
324
- selectedFiles.forEach(function(f) { formData.append('files', f); });
325
-
326
- fetch(API_BASE + '/gradio_api/upload', {
327
- method: 'POST',
328
- body: formData
329
- })
330
- .then(function(r) { return r.json(); })
331
- .then(function(uploadedPaths) {
332
- return fetch(API_BASE + '/gradio_api/call/upload', {
333
- method: 'POST',
334
- headers: { 'Content-Type': 'application/json' },
335
- body: JSON.stringify({ data: [tokenInput.value, uploadedPaths] })
336
- });
337
- })
338
- .then(function(r) { return r.json(); })
339
- .then(function(result) {
340
- return fetch(API_BASE + '/gradio_api/call/upload/' + result.event_id);
341
- })
342
- .then(function(r) { return r.text(); })
343
- .then(function(text) {
344
- var lines = text.split('\\n');
345
- var dataLine = lines.find(function(l) { return l.startsWith('data:'); });
346
- var parsed = JSON.parse(dataLine.substring(5).trim());
347
- var data = JSON.parse(Array.isArray(parsed) ? parsed[0] : parsed);
348
- if (data.error) {
349
- startBtn.disabled = false;
350
- startBtn.textContent = 'Start exploring';
351
- alert(data.error);
352
- return;
353
- }
354
- workspacePath = data.workspace_path;
355
- setupPanel.style.display = 'none';
356
- chatPanel.style.display = 'block';
357
- chatInput.focus();
358
- })
359
- .catch(function(err) {
360
- console.error(err);
361
- startBtn.disabled = false;
362
- startBtn.textContent = 'Start exploring';
363
- });
364
- });
365
-
366
- chatInput.addEventListener('keydown', function(e) {
367
- if (e.key !== 'Enter' || !chatInput.value.trim() || chatInput.disabled) return;
368
- e.preventDefault();
369
- var question = chatInput.value.trim();
370
- chatInput.value = '';
371
- chatInput.disabled = true;
372
- chatInput.placeholder = '';
373
-
374
- var turn = document.createElement('div');
375
- turn.className = 'chat-turn';
376
- turn.innerHTML = '<div class="chat-question">' + escapeHtml(question) + '</div>';
377
- chatHistory.appendChild(turn);
378
-
379
- var answerEl = document.createElement('div');
380
- answerEl.className = 'chat-answer';
381
- turn.appendChild(answerEl);
382
- var accumulated = '';
383
- var toolCount = 0;
384
-
385
- fetch(API_BASE + '/gradio_api/call/ask', {
386
- method: 'POST',
387
- headers: { 'Content-Type': 'application/json' },
388
- body: JSON.stringify({ data: [question, workspacePath, scratchPath, sessionCost, tokenInput.value] })
389
- })
390
- .then(function(r) { return r.json(); })
391
- .then(function(result) {
392
- var eventSource = new EventSource(API_BASE + '/gradio_api/call/ask/' + result.event_id);
393
-
394
- function handleEvent(e) {
395
- var raw = JSON.parse(e.data);
396
- var eventData = JSON.parse(Array.isArray(raw) ? raw[0] : raw);
397
-
398
- if (eventData.type === 'delta') {
399
- accumulated += eventData.content;
400
- answerEl.innerHTML = markdownToHtml(accumulated);
401
- answerEl.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
402
- } else if (eventData.type === 'tool_call') {
403
- toolCount = eventData.count;
404
- answerEl.innerHTML = '<em class="chat-tool-status">Exploring documents (' + toolCount + ' tool calls)...</em>';
405
- accumulated = '';
406
- } else if (eventData.type === 'error') {
407
- eventSource.close();
408
- answerEl.innerHTML = '<span class="chat-error">' + escapeHtml(eventData.error) + '</span>';
409
- chatInput.disabled = false;
410
- chatInput.placeholder = 'Ask a question about your documents...';
411
- } else if (eventData.type === 'done') {
412
- eventSource.close();
413
- var finalHtml = '<div class="chat-answer">' + markdownToHtml(eventData.answer || accumulated) + '</div>';
414
- finalHtml += renderSources(eventData.sources);
415
- if (eventData.stats) {
416
- finalHtml += '<div class="chat-stats">' + eventData.stats + '</div>';
417
- }
418
- if (eventData.trace_html) {
419
- finalHtml += '<button class="chat-trace-toggle" onclick="this.nextElementSibling.classList.toggle(\\'open\\')">trace</button>';
420
- finalHtml += '<div class="chat-trace">' + eventData.trace_html + '</div>';
421
- }
422
- turn.innerHTML = '<div class="chat-question">' + escapeHtml(question) + '</div>' + finalHtml;
423
-
424
- if (eventData.workspace_path) workspacePath = eventData.workspace_path;
425
- if (eventData.scratch_path) scratchPath = eventData.scratch_path;
426
- if (eventData.session_cost != null) {
427
- sessionCost = eventData.session_cost;
428
- sessionCostEl.textContent = 'Session cost: $' + sessionCost.toFixed(4);
429
- }
430
-
431
- chatInput.disabled = false;
432
- chatInput.placeholder = 'Ask a question about your documents...';
433
- chatInput.focus({ preventScroll: true });
434
- }
435
- }
436
-
437
- eventSource.addEventListener('generating', handleEvent);
438
- eventSource.addEventListener('complete', handleEvent);
439
-
440
- eventSource.onerror = function() {
441
- eventSource.close();
442
- if (!accumulated) {
443
- answerEl.innerHTML = '<span class="chat-error">Connection error.</span>';
444
- }
445
- chatInput.disabled = false;
446
- chatInput.placeholder = 'Ask a question about your documents...';
447
- };
448
- })
449
- .catch(function() {
450
- turn.innerHTML += '<div class="chat-error">Connection error.</div>';
451
- chatInput.disabled = false;
452
- chatInput.placeholder = 'Ask a question about your documents...';
453
- });
454
- });
455
- });
456
- """
457
-
458
 
459
  def build_app() -> gr.Blocks:
460
- custom_head = f"<script>{CUSTOM_JS}</script>"
461
- with gr.Blocks(title="Document Explorer", css=CUSTOM_CSS, head=custom_head) as demo:
462
- gr.HTML(CUSTOM_HTML)
463
-
464
- # Hidden state for file upload workspace
465
- upload_workspace = gr.State("")
466
 
467
  # Streaming ask endpoint
468
- api_ask_input = [
469
  gr.Textbox(visible=False), # question
470
  gr.Textbox(visible=False), # workspace_path
471
  gr.Textbox(visible=False), # scratch_path
472
  gr.Number(visible=False), # session_cost
473
  gr.Textbox(visible=False), # token
474
  ]
475
- api_ask_output = gr.Textbox(visible=False)
476
 
477
  def api_ask_stream(question, workspace_path, scratch_path, session_cost, token):
478
  for event_json in stream_question(
@@ -480,17 +238,12 @@ def build_app() -> gr.Blocks:
480
  ):
481
  yield event_json
482
 
483
- api_ask_btn = gr.Button(visible=False)
484
- api_ask_btn.click(
485
- api_ask_stream,
486
- inputs=api_ask_input,
487
- outputs=api_ask_output,
488
- api_name="ask",
489
- )
490
 
491
- # Upload endpoint — accepts file paths from Gradio's /upload, creates workspace
492
- upload_token_input = gr.Textbox(visible=False)
493
- upload_files_input = gr.File(visible=False, file_count="multiple")
494
  upload_output = gr.Textbox(visible=False)
495
 
496
  def api_upload(token, files):
@@ -498,22 +251,31 @@ def build_app() -> gr.Blocks:
498
  return json.dumps({"error": "Invalid access token."})
499
  if not files:
500
  return json.dumps({"error": "No files provided."})
501
- file_paths = [f.name if hasattr(f, 'name') else str(f) for f in files]
502
  workspace = save_uploaded_files(file_paths)
503
  return json.dumps({"workspace_path": str(workspace), "file_count": len(file_paths)})
504
 
505
  upload_btn = gr.Button(visible=False)
506
- upload_btn.click(api_upload, inputs=[upload_token_input, upload_files_input], outputs=upload_output, api_name="upload")
507
 
508
  # Document viewer endpoint
509
  doc_input = gr.Textbox(visible=False)
 
510
  doc_output = gr.Textbox(visible=False)
511
 
512
- def api_get_doc(filename):
513
- return json.dumps({"error": "not available"})
 
 
 
 
 
 
 
 
514
 
515
  doc_btn = gr.Button(visible=False)
516
- doc_btn.click(api_get_doc, inputs=doc_input, outputs=doc_output, api_name="doc")
517
 
518
  # Trace list endpoint
519
  traces_input = gr.Textbox(visible=False)
 
1
+ """Document Explorer — API backend for exploring uploaded documents with an LLM.
2
 
3
+ The frontend lives on the AppSimple website. This Space provides
4
+ streaming question/answer, file upload, and trace endpoints.
5
  """
6
 
7
  from __future__ import annotations
 
73
  print(f"WARNING: trace upload failed: {exc}")
74
 
75
 
76
+ def save_uploaded_files(file_paths: list[str]) -> Path:
77
  workspace = Path(tempfile.mkdtemp(prefix="lh-workspace-"))
78
+ for file_path in file_paths:
79
  src = Path(file_path)
80
  (workspace / src.name).write_bytes(src.read_bytes())
81
  return workspace
 
128
  scratch_path: str,
129
  session_cost: float,
130
  token: str,
 
131
  ) -> Generator[str, None, None]:
132
  """Streaming API — yields JSON event strings."""
133
  if ACCESS_TOKEN and token != ACCESS_TOKEN:
 
146
  return
147
 
148
  workspace = Path(workspace_path) if workspace_path else None
 
 
 
149
  if not workspace:
150
  yield json.dumps({"type": "error", "error": "No documents uploaded."})
151
  return
 
213
 
214
 
215
  # ---------------------------------------------------------------------------
216
+ # Gradio app (API endpoints only)
217
  # ---------------------------------------------------------------------------
218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
  def build_app() -> gr.Blocks:
221
+ with gr.Blocks(title="Document Explorer") as demo:
222
+ gr.Markdown("# Document Explorer API\n\nThis Space provides the API backend. "
223
+ "Visit [appsimple.io/explore](https://appsimple.io/explore) for the full interface.")
 
 
 
224
 
225
  # Streaming ask endpoint
226
+ ask_inputs = [
227
  gr.Textbox(visible=False), # question
228
  gr.Textbox(visible=False), # workspace_path
229
  gr.Textbox(visible=False), # scratch_path
230
  gr.Number(visible=False), # session_cost
231
  gr.Textbox(visible=False), # token
232
  ]
233
+ ask_output = gr.Textbox(visible=False)
234
 
235
  def api_ask_stream(question, workspace_path, scratch_path, session_cost, token):
236
  for event_json in stream_question(
 
238
  ):
239
  yield event_json
240
 
241
+ ask_btn = gr.Button(visible=False)
242
+ ask_btn.click(api_ask_stream, inputs=ask_inputs, outputs=ask_output, api_name="ask")
 
 
 
 
 
243
 
244
+ # Upload endpoint — accepts files, creates workspace
245
+ upload_token = gr.Textbox(visible=False)
246
+ upload_files = gr.File(visible=False, file_count="multiple")
247
  upload_output = gr.Textbox(visible=False)
248
 
249
  def api_upload(token, files):
 
251
  return json.dumps({"error": "Invalid access token."})
252
  if not files:
253
  return json.dumps({"error": "No files provided."})
254
+ file_paths = [f.name if hasattr(f, "name") else str(f) for f in files]
255
  workspace = save_uploaded_files(file_paths)
256
  return json.dumps({"workspace_path": str(workspace), "file_count": len(file_paths)})
257
 
258
  upload_btn = gr.Button(visible=False)
259
+ upload_btn.click(api_upload, inputs=[upload_token, upload_files], outputs=upload_output, api_name="upload")
260
 
261
  # Document viewer endpoint
262
  doc_input = gr.Textbox(visible=False)
263
+ doc_ws_input = gr.Textbox(visible=False)
264
  doc_output = gr.Textbox(visible=False)
265
 
266
+ def api_get_doc(filename, workspace_path):
267
+ if not workspace_path or not filename:
268
+ return json.dumps({"error": "not found"})
269
+ safe_name = Path(filename).name
270
+ if not safe_name.endswith(".md"):
271
+ safe_name += ".md"
272
+ filepath = Path(workspace_path) / safe_name
273
+ if not filepath.is_file():
274
+ return json.dumps({"error": "not found"})
275
+ return json.dumps({"filename": safe_name, "content": filepath.read_text()})
276
 
277
  doc_btn = gr.Button(visible=False)
278
+ doc_btn.click(api_get_doc, inputs=[doc_input, doc_ws_input], outputs=doc_output, api_name="doc")
279
 
280
  # Trace list endpoint
281
  traces_input = gr.Textbox(visible=False)