Spaces:

nothingworry
/

IntegraChat

Sleeping

App Files Files Community

nothingworry commited on 15 days ago

Commit

7501e7b

1 Parent(s): d1e5882

feat(gradio): Add AI metadata display, latency prediction, and context-aware routing visualization

Browse files

Files changed (1) hide show

app.py +157 -16

app.py CHANGED Viewed

@@ -202,7 +202,7 @@ def get_reasoning_trace(tenant_id: str, role: str, message: str):
             tool_traces = response_data.get("tool_traces", [])
             decision = response_data.get("decision", {})
-            # Format reasoning trace
             trace_md = "## 🧠 Reasoning Path\n\n"
             for idx, step in enumerate(reasoning_trace, 1):
                 step_name = step.get("step", "unknown")
@@ -214,35 +214,108 @@ def get_reasoning_trace(tenant_id: str, role: str, message: str):
                     trace_md += f"- **Rule Matches:** {step['match_count']}\n"
                 if step.get("hit_count"):
                     trace_md += f"- **RAG Hits:** {step['hit_count']}\n"
                 if step.get("latency_ms"):
-                    trace_md += f"- **Latency:** {step['latency_ms']}ms\n"
                 if step.get("decision"):
                     dec = step['decision']
                     trace_md += f"- **Tool:** {dec.get('tool', 'N/A')}\n"
                     trace_md += f"- **Action:** {dec.get('action', 'N/A')}\n"
                 trace_md += "\n"
-            # Format tool traces
             if tool_traces:
                 trace_md += "## ⚙️ Tool Invocations\n\n"
                 for idx, tool in enumerate(tool_traces, 1):
                     tool_name = tool.get("tool", tool.get("tool_name", "unknown"))
-                    latency = tool.get("latency_ms", tool.get("latency", 0))
                     status = tool.get("status", "success")
-                    trace_md += f"### {idx}. {tool_name}\n"
                     trace_md += f"- **Status:** {status}\n"
                     trace_md += f"- **Latency:** {latency}ms\n"
                     if tool.get("result_count"):
-                        trace_md += f"- **Results:** {tool['result_count']}\n"
                     trace_md += "\n"
-            # Format decision
             if decision:
                 trace_md += "## 🎯 Final Decision\n\n"
                 trace_md += f"- **Tool:** {decision.get('tool', 'N/A')}\n"
                 trace_md += f"- **Action:** {decision.get('action', 'N/A')}\n"
                 if decision.get('reason'):
-                    trace_md += f"- **Reason:** {decision['reason']}\n"
             return trace_md
         else:
@@ -313,7 +386,33 @@ def ingest_document(
         )
         if response.status_code == 200:
             data = response.json()
-            return f"✅ Document ingested successfully.\n\n{data.get('message', '')}"
         return f"❌ Ingestion failed ({response.status_code}): {response.text}"
     except requests.exceptions.ConnectionError:
         return "❌ Could not reach the backend. Make sure the FastAPI server is running."
@@ -354,7 +453,33 @@ def ingest_file(tenant_id: str, role: str, file_obj):
         )
         if response.status_code == 200:
             data = response.json()
-            return f"✅ File ingested successfully.\n\n{data.get('message', '')}"
         return f"❌ File ingestion failed ({response.status_code}): {response.text}"
     except FileNotFoundError:
         return "❌ Could not read the uploaded file."
@@ -1151,7 +1276,7 @@ def delete_all_documents(tenant_id: str, role: str):
 def search_knowledge_base(tenant_id: str, role: str, query: str):
-    """Search the knowledge base using RAG semantic search."""
     if not tenant_id or not tenant_id.strip():
         return "❗ Tenant ID is required.", []
@@ -1188,7 +1313,7 @@ def search_knowledge_base(tenant_id: str, role: str, query: str):
                     "Relevance": f"{relevance:.3f}" if relevance else "N/A"
                 })
-            status = f"✅ Found {len(results)} result(s) for '{query}'"
             return status, formatted_results
         else:
             error_msg = f"❌ Error {response.status_code}: {response.text}"
@@ -1521,9 +1646,12 @@ with gr.Blocks(
                             ### ⚡ Features
                             - ✨ Real-time streaming responses
                             - 🧠 Multi-step planning & reasoning
-                            - 🔍 Automatic tool selection
                             - 💾 Conversation memory
                             - 📊 Reasoning visualization (see Debug tab)
                             </div>
                             """
                         )
@@ -1584,8 +1712,11 @@ with gr.Blocks(
                 **Features:**
                 - 🧠 Step-by-step reasoning trace
-                - ⚙️ Tool invocation timeline
-                - 🎯 Final decision breakdown
                 - 📊 Performance metrics
                 </div>
                 """
@@ -1630,6 +1761,15 @@ with gr.Blocks(
                 - **Files:** PDF, DOCX, TXT, Markdown
                 - **Metadata:** Optional JSON metadata for better organization
                 **⚠️ Note:** Editor role and above can ingest. Admin/Owner can delete.
                 </div>
                 """
@@ -1787,7 +1927,8 @@ with gr.Blocks(
                 **Features:**
                 - **📊 Statistics:** View document counts, types, and distribution
-                - **🔍 Search:** Use semantic search to find relevant documents
                 - **🔽 Filter:** Filter documents by type (text, PDF, FAQ, link)
                 - **🗑️ Delete:** Remove individual documents or delete all at once (Admin/Owner only)
                 </div>

             tool_traces = response_data.get("tool_traces", [])
             decision = response_data.get("decision", {})
+            # Format reasoning trace with latency predictions and context hints
             trace_md = "## 🧠 Reasoning Path\n\n"
             for idx, step in enumerate(reasoning_trace, 1):
                 step_name = step.get("step", "unknown")
                     trace_md += f"- **Rule Matches:** {step['match_count']}\n"
                 if step.get("hit_count"):
                     trace_md += f"- **RAG Hits:** {step['hit_count']}\n"
+                if step.get("top_score"):
+                    trace_md += f"- **Top RAG Score:** {step['top_score']:.3f}\n"
                 if step.get("latency_ms"):
+                    trace_md += f"- **Actual Latency:** {step['latency_ms']}ms\n"
                 if step.get("decision"):
                     dec = step['decision']
                     trace_md += f"- **Tool:** {dec.get('tool', 'N/A')}\n"
                     trace_md += f"- **Action:** {dec.get('action', 'N/A')}\n"
+                    # Show latency prediction if available
+                    if dec.get('tool_input') and isinstance(dec['tool_input'], dict):
+                        est_latency = dec['tool_input'].get('_estimated_latency_ms')
+                        if est_latency:
+                            trace_md += f"- **⚡ Estimated Latency:** {est_latency}ms\n"
                 trace_md += "\n"
+            # Format tool traces with schema information
             if tool_traces:
                 trace_md += "## ⚙️ Tool Invocations\n\n"
                 for idx, tool in enumerate(tool_traces, 1):
                     tool_name = tool.get("tool", tool.get("tool_name", "unknown"))
+                    response = tool.get("response", {})
+                    latency = tool.get("latency_ms", response.get("latency_ms", 0))
                     status = tool.get("status", "success")
+                    trace_md += f"### {idx}. {tool_name.upper()}\n"
                     trace_md += f"- **Status:** {status}\n"
                     trace_md += f"- **Latency:** {latency}ms\n"
+                    # Show latency prediction vs actual
+                    if isinstance(response, dict) and response.get("latency_ms"):
+                        actual = response["latency_ms"]
+                        trace_md += f"- **⚡ Actual vs Estimated:** {actual}ms\n"
+                    # Show schema-validated output structure
+                    if isinstance(response, dict):
+                        if tool_name == "rag" and "results" in response:
+                            trace_md += f"- **📊 Schema:** Valid RAG output\n"
+                            trace_md += f"- **Results:** {len(response.get('results', []))} chunks\n"
+                            trace_md += f"- **Top Score:** {response.get('top_score', 0):.3f}\n"
+                        elif tool_name == "web" and "results" in response:
+                            trace_md += f"- **📊 Schema:** Valid Web output\n"
+                            trace_md += f"- **Results:** {len(response.get('results', []))} items\n"
+                        elif tool_name == "admin" and "violations" in response:
+                            trace_md += f"- **📊 Schema:** Valid Admin output\n"
+                            trace_md += f"- **Violations:** {len(response.get('violations', []))}\n"
+                        elif tool_name == "llm" and "text" in response:
+                            trace_md += f"- **📊 Schema:** Valid LLM output\n"
+                            trace_md += f"- **Tokens:** {response.get('tokens_used', 0)}\n"
                     if tool.get("result_count"):
+                        trace_md += f"- **Result Count:** {tool['result_count']}\n"
                     trace_md += "\n"
+            # Format decision with context-aware routing and latency info
             if decision:
                 trace_md += "## 🎯 Final Decision\n\n"
                 trace_md += f"- **Tool:** {decision.get('tool', 'N/A')}\n"
                 trace_md += f"- **Action:** {decision.get('action', 'N/A')}\n"
                 if decision.get('reason'):
+                    reason = decision['reason']
+                    trace_md += f"- **Reason:** {reason}\n"
+                    # Extract and highlight context-aware routing hints
+                    if "context:" in reason.lower():
+                        trace_md += "\n### 🧠 Context-Aware Routing:\n"
+                        if "skip web" in reason.lower() or "rag high" in reason.lower():
+                            trace_md += "- ⚡ **RAG high score → Web search skipped**\n"
+                        if "skip rag" in reason.lower() or "memory" in reason.lower():
+                            trace_md += "- 💾 **Relevant memory available → RAG skipped**\n"
+                        if "skip reasoning" in reason.lower() or "critical" in reason.lower():
+                            trace_md += "- 🚨 **Critical violation → Agent reasoning skipped**\n"
+                    # Extract latency estimates
+                    if "latency:" in reason.lower() or "est." in reason.lower():
+                        import re
+                        latency_match = re.search(r'latency[:\s]+(\d+)ms', reason, re.IGNORECASE)
+                        if latency_match:
+                            est_latency = latency_match.group(1)
+                            trace_md += f"\n### ⚡ Latency Prediction:\n"
+                            trace_md += f"- **Estimated Total Latency:** {est_latency}ms\n"
+                # Show tool sequence with latency estimates
+                if decision.get('tool_input') and isinstance(decision['tool_input'], dict):
+                    steps = decision['tool_input'].get('steps', [])
+                    if steps:
+                        trace_md += "\n### 📋 Tool Execution Plan:\n"
+                        total_est_latency = 0
+                        for step_idx, step in enumerate(steps, 1):
+                            if isinstance(step, dict):
+                                if "parallel" in step:
+                                    trace_md += f"{step_idx}. **Parallel Execution:** RAG + Web\n"
+                                    total_est_latency += max(90, 800)  # Max of RAG and Web
+                                elif step.get("tool"):
+                                    tool = step["tool"]
+                                    est_lat = step.get("input", {}).get("_estimated_latency_ms", 0)
+                                    if est_lat:
+                                        total_est_latency += est_lat
+                                        trace_md += f"{step_idx}. **{tool.upper()}** (est. {est_lat}ms)\n"
+                                    else:
+                                        trace_md += f"{step_idx}. **{tool.upper()}**\n"
+                        if total_est_latency > 0:
+                            trace_md += f"\n- **Total Estimated Latency:** {total_est_latency}ms\n"
             return trace_md
         else:
         )
         if response.status_code == 200:
             data = response.json()
+            message = f"✅ Document ingested successfully.\n\n{data.get('message', '')}"
+            # Display extracted metadata if available
+            extracted_metadata = data.get('extracted_metadata', {})
+            if extracted_metadata:
+                message += "\n\n### 🤖 AI-Generated Metadata:\n"
+                if extracted_metadata.get('title'):
+                    message += f"- **Title:** {extracted_metadata['title']}\n"
+                if extracted_metadata.get('summary'):
+                    message += f"- **Summary:** {extracted_metadata['summary'][:200]}...\n"
+                if extracted_metadata.get('tags'):
+                    tags = ', '.join(extracted_metadata['tags'][:5])
+                    message += f"- **Tags:** {tags}\n"
+                if extracted_metadata.get('topics'):
+                    topics = ', '.join(extracted_metadata['topics'][:3])
+                    message += f"- **Topics:** {topics}\n"
+                if extracted_metadata.get('quality_score'):
+                    quality = extracted_metadata['quality_score']
+                    quality_bar = "█" * int(quality * 10) + "░" * (10 - int(quality * 10))
+                    message += f"- **Quality Score:** {quality:.2f} {quality_bar}\n"
+                if extracted_metadata.get('detected_date'):
+                    message += f"- **Detected Date:** {extracted_metadata['detected_date']}\n"
+                if extracted_metadata.get('extraction_method'):
+                    method = extracted_metadata['extraction_method'].upper()
+                    message += f"- **Extraction Method:** {method}\n"
+            return message
         return f"❌ Ingestion failed ({response.status_code}): {response.text}"
     except requests.exceptions.ConnectionError:
         return "❌ Could not reach the backend. Make sure the FastAPI server is running."
         )
         if response.status_code == 200:
             data = response.json()
+            message = f"✅ File ingested successfully.\n\n{data.get('message', '')}"
+            # Display extracted metadata if available
+            extracted_metadata = data.get('extracted_metadata', {})
+            if extracted_metadata:
+                message += "\n\n### 🤖 AI-Generated Metadata:\n"
+                if extracted_metadata.get('title'):
+                    message += f"- **Title:** {extracted_metadata['title']}\n"
+                if extracted_metadata.get('summary'):
+                    message += f"- **Summary:** {extracted_metadata['summary'][:200]}...\n"
+                if extracted_metadata.get('tags'):
+                    tags = ', '.join(extracted_metadata['tags'][:5])
+                    message += f"- **Tags:** {tags}\n"
+                if extracted_metadata.get('topics'):
+                    topics = ', '.join(extracted_metadata['topics'][:3])
+                    message += f"- **Topics:** {topics}\n"
+                if extracted_metadata.get('quality_score'):
+                    quality = extracted_metadata['quality_score']
+                    quality_bar = "█" * int(quality * 10) + "░" * (10 - int(quality * 10))
+                    message += f"- **Quality Score:** {quality:.2f} {quality_bar}\n"
+                if extracted_metadata.get('detected_date'):
+                    message += f"- **Detected Date:** {extracted_metadata['detected_date']}\n"
+                if extracted_metadata.get('extraction_method'):
+                    method = extracted_metadata['extraction_method'].upper()
+                    message += f"- **Extraction Method:** {method}\n"
+            return message
         return f"❌ File ingestion failed ({response.status_code}): {response.text}"
     except FileNotFoundError:
         return "❌ Could not read the uploaded file."
 def search_knowledge_base(tenant_id: str, role: str, query: str):
+    """Search the knowledge base using RAG semantic search with cross-encoder re-ranking."""
     if not tenant_id or not tenant_id.strip():
         return "❗ Tenant ID is required.", []
                     "Relevance": f"{relevance:.3f}" if relevance else "N/A"
                 })
+            status = f"✅ Found {len(results)} result(s) for '{query}' (re-ranked with cross-encoder)"
             return status, formatted_results
         else:
             error_msg = f"❌ Error {response.status_code}: {response.text}"
                             ### ⚡ Features
                             - ✨ Real-time streaming responses
                             - 🧠 Multi-step planning & reasoning
+                            - 🔍 Automatic tool selection with latency prediction
+                            - 🧠 Context-aware routing (intelligent tool skipping)
                             - 💾 Conversation memory
                             - 📊 Reasoning visualization (see Debug tab)
+                            - ⚡ Per-tool latency estimates (RAG: 60-120ms, Web: 400-1800ms)
+                            - 📋 Schema-validated tool outputs
                             </div>
                             """
                         )
                 **Features:**
                 - 🧠 Step-by-step reasoning trace
+                - ⚙️ Tool invocation timeline with schema-validated outputs
+                - ⚡ Per-tool latency predictions (RAG: 60-120ms, Web: 400-1800ms, Admin: <20ms)
+                - 🧠 Context-aware routing hints (skip web if RAG high, skip RAG if memory available)
+                - 📊 Tool output schemas for easier debugging
+                - 🎯 Final decision breakdown with estimated latency
                 - 📊 Performance metrics
                 </div>
                 """
                 - **Files:** PDF, DOCX, TXT, Markdown
                 - **Metadata:** Optional JSON metadata for better organization
+                **🤖 AI-Generated Metadata (Automatic):**
+                - ✨ **Title extraction** from filename, content, or URL
+                - 📝 **Summary generation** (2-3 sentences via LLM)
+                - 🏷️ **Tags extraction** (5-8 relevant tags)
+                - 📚 **Topics identification** (3-5 main themes)
+                - 📅 **Date detection** (multiple formats)
+                - ⭐ **Quality score** (0.0-1.0 based on structure and completeness)
+                - 🔄 **Intelligent fallback** when LLM is unavailable
                 **⚠️ Note:** Editor role and above can ingest. Admin/Owner can delete.
                 </div>
                 """
                 **Features:**
                 - **📊 Statistics:** View document counts, types, and distribution
+                - **🔍 Search:** Use semantic search with cross-encoder re-ranking for better results
+                - **🤖 AI Metadata:** Documents include auto-extracted title, summary, tags, topics, and quality scores
                 - **🔽 Filter:** Filter documents by type (text, PDF, FAQ, link)
                 - **🗑️ Delete:** Remove individual documents or delete all at once (Admin/Owner only)
                 </div>