nothingworry commited on
Commit
7501e7b
Β·
1 Parent(s): d1e5882

feat(gradio): Add AI metadata display, latency prediction, and context-aware routing visualization

Browse files
Files changed (1) hide show
  1. app.py +157 -16
app.py CHANGED
@@ -202,7 +202,7 @@ def get_reasoning_trace(tenant_id: str, role: str, message: str):
202
  tool_traces = response_data.get("tool_traces", [])
203
  decision = response_data.get("decision", {})
204
 
205
- # Format reasoning trace
206
  trace_md = "## 🧠 Reasoning Path\n\n"
207
  for idx, step in enumerate(reasoning_trace, 1):
208
  step_name = step.get("step", "unknown")
@@ -214,35 +214,108 @@ def get_reasoning_trace(tenant_id: str, role: str, message: str):
214
  trace_md += f"- **Rule Matches:** {step['match_count']}\n"
215
  if step.get("hit_count"):
216
  trace_md += f"- **RAG Hits:** {step['hit_count']}\n"
 
 
217
  if step.get("latency_ms"):
218
- trace_md += f"- **Latency:** {step['latency_ms']}ms\n"
219
  if step.get("decision"):
220
  dec = step['decision']
221
  trace_md += f"- **Tool:** {dec.get('tool', 'N/A')}\n"
222
  trace_md += f"- **Action:** {dec.get('action', 'N/A')}\n"
 
 
 
 
 
223
  trace_md += "\n"
224
 
225
- # Format tool traces
226
  if tool_traces:
227
  trace_md += "## βš™οΈ Tool Invocations\n\n"
228
  for idx, tool in enumerate(tool_traces, 1):
229
  tool_name = tool.get("tool", tool.get("tool_name", "unknown"))
230
- latency = tool.get("latency_ms", tool.get("latency", 0))
 
231
  status = tool.get("status", "success")
232
- trace_md += f"### {idx}. {tool_name}\n"
 
233
  trace_md += f"- **Status:** {status}\n"
234
  trace_md += f"- **Latency:** {latency}ms\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  if tool.get("result_count"):
236
- trace_md += f"- **Results:** {tool['result_count']}\n"
237
  trace_md += "\n"
238
 
239
- # Format decision
240
  if decision:
241
  trace_md += "## 🎯 Final Decision\n\n"
242
  trace_md += f"- **Tool:** {decision.get('tool', 'N/A')}\n"
243
  trace_md += f"- **Action:** {decision.get('action', 'N/A')}\n"
244
  if decision.get('reason'):
245
- trace_md += f"- **Reason:** {decision['reason']}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
  return trace_md
248
  else:
@@ -313,7 +386,33 @@ def ingest_document(
313
  )
314
  if response.status_code == 200:
315
  data = response.json()
316
- return f"βœ… Document ingested successfully.\n\n{data.get('message', '')}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  return f"❌ Ingestion failed ({response.status_code}): {response.text}"
318
  except requests.exceptions.ConnectionError:
319
  return "❌ Could not reach the backend. Make sure the FastAPI server is running."
@@ -354,7 +453,33 @@ def ingest_file(tenant_id: str, role: str, file_obj):
354
  )
355
  if response.status_code == 200:
356
  data = response.json()
357
- return f"βœ… File ingested successfully.\n\n{data.get('message', '')}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  return f"❌ File ingestion failed ({response.status_code}): {response.text}"
359
  except FileNotFoundError:
360
  return "❌ Could not read the uploaded file."
@@ -1151,7 +1276,7 @@ def delete_all_documents(tenant_id: str, role: str):
1151
 
1152
 
1153
  def search_knowledge_base(tenant_id: str, role: str, query: str):
1154
- """Search the knowledge base using RAG semantic search."""
1155
  if not tenant_id or not tenant_id.strip():
1156
  return "❗ Tenant ID is required.", []
1157
 
@@ -1188,7 +1313,7 @@ def search_knowledge_base(tenant_id: str, role: str, query: str):
1188
  "Relevance": f"{relevance:.3f}" if relevance else "N/A"
1189
  })
1190
 
1191
- status = f"βœ… Found {len(results)} result(s) for '{query}'"
1192
  return status, formatted_results
1193
  else:
1194
  error_msg = f"❌ Error {response.status_code}: {response.text}"
@@ -1521,9 +1646,12 @@ with gr.Blocks(
1521
  ### ⚑ Features
1522
  - ✨ Real-time streaming responses
1523
  - 🧠 Multi-step planning & reasoning
1524
- - πŸ” Automatic tool selection
 
1525
  - πŸ’Ύ Conversation memory
1526
  - πŸ“Š Reasoning visualization (see Debug tab)
 
 
1527
  </div>
1528
  """
1529
  )
@@ -1584,8 +1712,11 @@ with gr.Blocks(
1584
 
1585
  **Features:**
1586
  - 🧠 Step-by-step reasoning trace
1587
- - βš™οΈ Tool invocation timeline
1588
- - 🎯 Final decision breakdown
 
 
 
1589
  - πŸ“Š Performance metrics
1590
  </div>
1591
  """
@@ -1630,6 +1761,15 @@ with gr.Blocks(
1630
  - **Files:** PDF, DOCX, TXT, Markdown
1631
  - **Metadata:** Optional JSON metadata for better organization
1632
 
 
 
 
 
 
 
 
 
 
1633
  **⚠️ Note:** Editor role and above can ingest. Admin/Owner can delete.
1634
  </div>
1635
  """
@@ -1787,7 +1927,8 @@ with gr.Blocks(
1787
 
1788
  **Features:**
1789
  - **πŸ“Š Statistics:** View document counts, types, and distribution
1790
- - **πŸ” Search:** Use semantic search to find relevant documents
 
1791
  - **πŸ”½ Filter:** Filter documents by type (text, PDF, FAQ, link)
1792
  - **πŸ—‘οΈ Delete:** Remove individual documents or delete all at once (Admin/Owner only)
1793
  </div>
 
202
  tool_traces = response_data.get("tool_traces", [])
203
  decision = response_data.get("decision", {})
204
 
205
+ # Format reasoning trace with latency predictions and context hints
206
  trace_md = "## 🧠 Reasoning Path\n\n"
207
  for idx, step in enumerate(reasoning_trace, 1):
208
  step_name = step.get("step", "unknown")
 
214
  trace_md += f"- **Rule Matches:** {step['match_count']}\n"
215
  if step.get("hit_count"):
216
  trace_md += f"- **RAG Hits:** {step['hit_count']}\n"
217
+ if step.get("top_score"):
218
+ trace_md += f"- **Top RAG Score:** {step['top_score']:.3f}\n"
219
  if step.get("latency_ms"):
220
+ trace_md += f"- **Actual Latency:** {step['latency_ms']}ms\n"
221
  if step.get("decision"):
222
  dec = step['decision']
223
  trace_md += f"- **Tool:** {dec.get('tool', 'N/A')}\n"
224
  trace_md += f"- **Action:** {dec.get('action', 'N/A')}\n"
225
+ # Show latency prediction if available
226
+ if dec.get('tool_input') and isinstance(dec['tool_input'], dict):
227
+ est_latency = dec['tool_input'].get('_estimated_latency_ms')
228
+ if est_latency:
229
+ trace_md += f"- **⚑ Estimated Latency:** {est_latency}ms\n"
230
  trace_md += "\n"
231
 
232
+ # Format tool traces with schema information
233
  if tool_traces:
234
  trace_md += "## βš™οΈ Tool Invocations\n\n"
235
  for idx, tool in enumerate(tool_traces, 1):
236
  tool_name = tool.get("tool", tool.get("tool_name", "unknown"))
237
+ response = tool.get("response", {})
238
+ latency = tool.get("latency_ms", response.get("latency_ms", 0))
239
  status = tool.get("status", "success")
240
+
241
+ trace_md += f"### {idx}. {tool_name.upper()}\n"
242
  trace_md += f"- **Status:** {status}\n"
243
  trace_md += f"- **Latency:** {latency}ms\n"
244
+
245
+ # Show latency prediction vs actual
246
+ if isinstance(response, dict) and response.get("latency_ms"):
247
+ actual = response["latency_ms"]
248
+ trace_md += f"- **⚑ Actual vs Estimated:** {actual}ms\n"
249
+
250
+ # Show schema-validated output structure
251
+ if isinstance(response, dict):
252
+ if tool_name == "rag" and "results" in response:
253
+ trace_md += f"- **πŸ“Š Schema:** Valid RAG output\n"
254
+ trace_md += f"- **Results:** {len(response.get('results', []))} chunks\n"
255
+ trace_md += f"- **Top Score:** {response.get('top_score', 0):.3f}\n"
256
+ elif tool_name == "web" and "results" in response:
257
+ trace_md += f"- **πŸ“Š Schema:** Valid Web output\n"
258
+ trace_md += f"- **Results:** {len(response.get('results', []))} items\n"
259
+ elif tool_name == "admin" and "violations" in response:
260
+ trace_md += f"- **πŸ“Š Schema:** Valid Admin output\n"
261
+ trace_md += f"- **Violations:** {len(response.get('violations', []))}\n"
262
+ elif tool_name == "llm" and "text" in response:
263
+ trace_md += f"- **πŸ“Š Schema:** Valid LLM output\n"
264
+ trace_md += f"- **Tokens:** {response.get('tokens_used', 0)}\n"
265
+
266
  if tool.get("result_count"):
267
+ trace_md += f"- **Result Count:** {tool['result_count']}\n"
268
  trace_md += "\n"
269
 
270
+ # Format decision with context-aware routing and latency info
271
  if decision:
272
  trace_md += "## 🎯 Final Decision\n\n"
273
  trace_md += f"- **Tool:** {decision.get('tool', 'N/A')}\n"
274
  trace_md += f"- **Action:** {decision.get('action', 'N/A')}\n"
275
  if decision.get('reason'):
276
+ reason = decision['reason']
277
+ trace_md += f"- **Reason:** {reason}\n"
278
+
279
+ # Extract and highlight context-aware routing hints
280
+ if "context:" in reason.lower():
281
+ trace_md += "\n### 🧠 Context-Aware Routing:\n"
282
+ if "skip web" in reason.lower() or "rag high" in reason.lower():
283
+ trace_md += "- ⚑ **RAG high score β†’ Web search skipped**\n"
284
+ if "skip rag" in reason.lower() or "memory" in reason.lower():
285
+ trace_md += "- πŸ’Ύ **Relevant memory available β†’ RAG skipped**\n"
286
+ if "skip reasoning" in reason.lower() or "critical" in reason.lower():
287
+ trace_md += "- 🚨 **Critical violation β†’ Agent reasoning skipped**\n"
288
+
289
+ # Extract latency estimates
290
+ if "latency:" in reason.lower() or "est." in reason.lower():
291
+ import re
292
+ latency_match = re.search(r'latency[:\s]+(\d+)ms', reason, re.IGNORECASE)
293
+ if latency_match:
294
+ est_latency = latency_match.group(1)
295
+ trace_md += f"\n### ⚑ Latency Prediction:\n"
296
+ trace_md += f"- **Estimated Total Latency:** {est_latency}ms\n"
297
+
298
+ # Show tool sequence with latency estimates
299
+ if decision.get('tool_input') and isinstance(decision['tool_input'], dict):
300
+ steps = decision['tool_input'].get('steps', [])
301
+ if steps:
302
+ trace_md += "\n### πŸ“‹ Tool Execution Plan:\n"
303
+ total_est_latency = 0
304
+ for step_idx, step in enumerate(steps, 1):
305
+ if isinstance(step, dict):
306
+ if "parallel" in step:
307
+ trace_md += f"{step_idx}. **Parallel Execution:** RAG + Web\n"
308
+ total_est_latency += max(90, 800) # Max of RAG and Web
309
+ elif step.get("tool"):
310
+ tool = step["tool"]
311
+ est_lat = step.get("input", {}).get("_estimated_latency_ms", 0)
312
+ if est_lat:
313
+ total_est_latency += est_lat
314
+ trace_md += f"{step_idx}. **{tool.upper()}** (est. {est_lat}ms)\n"
315
+ else:
316
+ trace_md += f"{step_idx}. **{tool.upper()}**\n"
317
+ if total_est_latency > 0:
318
+ trace_md += f"\n- **Total Estimated Latency:** {total_est_latency}ms\n"
319
 
320
  return trace_md
321
  else:
 
386
  )
387
  if response.status_code == 200:
388
  data = response.json()
389
+ message = f"βœ… Document ingested successfully.\n\n{data.get('message', '')}"
390
+
391
+ # Display extracted metadata if available
392
+ extracted_metadata = data.get('extracted_metadata', {})
393
+ if extracted_metadata:
394
+ message += "\n\n### πŸ€– AI-Generated Metadata:\n"
395
+ if extracted_metadata.get('title'):
396
+ message += f"- **Title:** {extracted_metadata['title']}\n"
397
+ if extracted_metadata.get('summary'):
398
+ message += f"- **Summary:** {extracted_metadata['summary'][:200]}...\n"
399
+ if extracted_metadata.get('tags'):
400
+ tags = ', '.join(extracted_metadata['tags'][:5])
401
+ message += f"- **Tags:** {tags}\n"
402
+ if extracted_metadata.get('topics'):
403
+ topics = ', '.join(extracted_metadata['topics'][:3])
404
+ message += f"- **Topics:** {topics}\n"
405
+ if extracted_metadata.get('quality_score'):
406
+ quality = extracted_metadata['quality_score']
407
+ quality_bar = "β–ˆ" * int(quality * 10) + "β–‘" * (10 - int(quality * 10))
408
+ message += f"- **Quality Score:** {quality:.2f} {quality_bar}\n"
409
+ if extracted_metadata.get('detected_date'):
410
+ message += f"- **Detected Date:** {extracted_metadata['detected_date']}\n"
411
+ if extracted_metadata.get('extraction_method'):
412
+ method = extracted_metadata['extraction_method'].upper()
413
+ message += f"- **Extraction Method:** {method}\n"
414
+
415
+ return message
416
  return f"❌ Ingestion failed ({response.status_code}): {response.text}"
417
  except requests.exceptions.ConnectionError:
418
  return "❌ Could not reach the backend. Make sure the FastAPI server is running."
 
453
  )
454
  if response.status_code == 200:
455
  data = response.json()
456
+ message = f"βœ… File ingested successfully.\n\n{data.get('message', '')}"
457
+
458
+ # Display extracted metadata if available
459
+ extracted_metadata = data.get('extracted_metadata', {})
460
+ if extracted_metadata:
461
+ message += "\n\n### πŸ€– AI-Generated Metadata:\n"
462
+ if extracted_metadata.get('title'):
463
+ message += f"- **Title:** {extracted_metadata['title']}\n"
464
+ if extracted_metadata.get('summary'):
465
+ message += f"- **Summary:** {extracted_metadata['summary'][:200]}...\n"
466
+ if extracted_metadata.get('tags'):
467
+ tags = ', '.join(extracted_metadata['tags'][:5])
468
+ message += f"- **Tags:** {tags}\n"
469
+ if extracted_metadata.get('topics'):
470
+ topics = ', '.join(extracted_metadata['topics'][:3])
471
+ message += f"- **Topics:** {topics}\n"
472
+ if extracted_metadata.get('quality_score'):
473
+ quality = extracted_metadata['quality_score']
474
+ quality_bar = "β–ˆ" * int(quality * 10) + "β–‘" * (10 - int(quality * 10))
475
+ message += f"- **Quality Score:** {quality:.2f} {quality_bar}\n"
476
+ if extracted_metadata.get('detected_date'):
477
+ message += f"- **Detected Date:** {extracted_metadata['detected_date']}\n"
478
+ if extracted_metadata.get('extraction_method'):
479
+ method = extracted_metadata['extraction_method'].upper()
480
+ message += f"- **Extraction Method:** {method}\n"
481
+
482
+ return message
483
  return f"❌ File ingestion failed ({response.status_code}): {response.text}"
484
  except FileNotFoundError:
485
  return "❌ Could not read the uploaded file."
 
1276
 
1277
 
1278
  def search_knowledge_base(tenant_id: str, role: str, query: str):
1279
+ """Search the knowledge base using RAG semantic search with cross-encoder re-ranking."""
1280
  if not tenant_id or not tenant_id.strip():
1281
  return "❗ Tenant ID is required.", []
1282
 
 
1313
  "Relevance": f"{relevance:.3f}" if relevance else "N/A"
1314
  })
1315
 
1316
+ status = f"βœ… Found {len(results)} result(s) for '{query}' (re-ranked with cross-encoder)"
1317
  return status, formatted_results
1318
  else:
1319
  error_msg = f"❌ Error {response.status_code}: {response.text}"
 
1646
  ### ⚑ Features
1647
  - ✨ Real-time streaming responses
1648
  - 🧠 Multi-step planning & reasoning
1649
+ - πŸ” Automatic tool selection with latency prediction
1650
+ - 🧠 Context-aware routing (intelligent tool skipping)
1651
  - πŸ’Ύ Conversation memory
1652
  - πŸ“Š Reasoning visualization (see Debug tab)
1653
+ - ⚑ Per-tool latency estimates (RAG: 60-120ms, Web: 400-1800ms)
1654
+ - πŸ“‹ Schema-validated tool outputs
1655
  </div>
1656
  """
1657
  )
 
1712
 
1713
  **Features:**
1714
  - 🧠 Step-by-step reasoning trace
1715
+ - βš™οΈ Tool invocation timeline with schema-validated outputs
1716
+ - ⚑ Per-tool latency predictions (RAG: 60-120ms, Web: 400-1800ms, Admin: <20ms)
1717
+ - 🧠 Context-aware routing hints (skip web if RAG high, skip RAG if memory available)
1718
+ - πŸ“Š Tool output schemas for easier debugging
1719
+ - 🎯 Final decision breakdown with estimated latency
1720
  - πŸ“Š Performance metrics
1721
  </div>
1722
  """
 
1761
  - **Files:** PDF, DOCX, TXT, Markdown
1762
  - **Metadata:** Optional JSON metadata for better organization
1763
 
1764
+ **πŸ€– AI-Generated Metadata (Automatic):**
1765
+ - ✨ **Title extraction** from filename, content, or URL
1766
+ - πŸ“ **Summary generation** (2-3 sentences via LLM)
1767
+ - 🏷️ **Tags extraction** (5-8 relevant tags)
1768
+ - πŸ“š **Topics identification** (3-5 main themes)
1769
+ - πŸ“… **Date detection** (multiple formats)
1770
+ - ⭐ **Quality score** (0.0-1.0 based on structure and completeness)
1771
+ - πŸ”„ **Intelligent fallback** when LLM is unavailable
1772
+
1773
  **⚠️ Note:** Editor role and above can ingest. Admin/Owner can delete.
1774
  </div>
1775
  """
 
1927
 
1928
  **Features:**
1929
  - **πŸ“Š Statistics:** View document counts, types, and distribution
1930
+ - **πŸ” Search:** Use semantic search with cross-encoder re-ranking for better results
1931
+ - **πŸ€– AI Metadata:** Documents include auto-extracted title, summary, tags, topics, and quality scores
1932
  - **πŸ”½ Filter:** Filter documents by type (text, PDF, FAQ, link)
1933
  - **πŸ—‘οΈ Delete:** Remove individual documents or delete all at once (Admin/Owner only)
1934
  </div>