import gradio as gr
import requests
import json
import os
from pathlib import Path
from collections import Counter
from datetime import datetime
try:
import plotly.graph_objects as go
PLOTLY_AVAILABLE = True
except ImportError:
PLOTLY_AVAILABLE = False
go = None
BACKEND_BASE_URL = os.getenv("BACKEND_BASE_URL", "http://localhost:8000")
# Role-based access control permissions
VALID_ROLES = ["viewer", "editor", "admin", "owner"]
DEFAULT_ROLE = "viewer"
def can_manage_rules(role: str) -> bool:
"""Check if role can manage rules (admin/owner only)."""
return role in ["admin", "owner"]
def can_ingest_documents(role: str) -> bool:
"""Check if role can ingest documents (editor/admin/owner)."""
return role in ["editor", "admin", "owner"]
def can_delete_documents(role: str) -> bool:
"""Check if role can delete documents (admin/owner only)."""
return role in ["admin", "owner"]
def can_view_analytics(role: str) -> bool:
"""Check if role can view analytics (all roles can view)."""
return role in VALID_ROLES # All roles can view analytics
def chat_with_agent(message, tenant_id, role, history):
"""
Send a message to the backend MCP agent and return the response.
Uses streaming for real-time word-by-word updates.
Args:
message: User's message text
tenant_id: Tenant ID for multi-tenant isolation
history: Chat history (Gradio messages format)
Yields:
Updated chat history with agent response (streaming)
"""
if not message or not message.strip():
yield history
return
if not tenant_id or not tenant_id.strip():
error_msg = "Please enter a Tenant ID before sending a message."
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": error_msg})
yield history
return
# Add user message to history
history.append({"role": "user", "content": message})
# Backend streaming endpoint
backend_url = f"{BACKEND_BASE_URL}/agent/message/stream"
# Prepare request payload
payload = {
"tenant_id": tenant_id.strip(),
"message": message,
"user_id": None,
"conversation_history": [],
"temperature": 0.0
}
# Prepare headers with role
headers = {
"Content-Type": "application/json",
"x-tenant-id": tenant_id.strip(),
"x-user-role": role if role else DEFAULT_ROLE
}
try:
# Make streaming request
response = requests.post(
backend_url,
json=payload,
headers=headers,
stream=True,
timeout=120
)
if response.status_code == 200:
# Initialize assistant message
assistant_message = ""
history.append({"role": "assistant", "content": assistant_message})
yield history # Yield initial empty message
# Stream tokens - use iter_lines for SSE format
for line_bytes in response.iter_lines():
if line_bytes:
try:
line = line_bytes.decode('utf-8').strip()
if not line:
continue
if line.startswith('data: '):
data_str = line[6:] # Remove 'data: ' prefix
try:
data = json.loads(data_str)
# Handle status messages
if 'status' in data:
status_msg = data.get('message', '')
if status_msg:
# Show status in the message temporarily
history[-1] = {"role": "assistant", "content": f"⏳ {status_msg}"}
yield history
continue
# Handle tokens
token = data.get('token', '')
if token:
assistant_message += token
# Update the last message in history
history[-1] = {"role": "assistant", "content": assistant_message}
yield history # Yield updated history immediately
if data.get('done', False):
break
except json.JSONDecodeError:
continue
elif line.startswith('error:'):
try:
error_data = json.loads(line[6:])
error_msg = error_data.get('error', 'Unknown error')
history[-1] = {"role": "assistant", "content": f"❌ Error: {error_msg}"}
yield history
break
except:
pass
except UnicodeDecodeError:
continue
else:
error_msg = f"Error {response.status_code}: {response.text}"
history.append({"role": "assistant", "content": error_msg})
yield history
except requests.exceptions.ConnectionError:
error_msg = "❌ Connection Error: Could not connect to backend. Please ensure the FastAPI server is running at http://localhost:8000"
history.append({"role": "assistant", "content": error_msg})
yield history
except requests.exceptions.Timeout:
error_msg = "⏱️ Request Timeout: The backend took longer than 2 minutes to respond. This may happen if:\n- The LLM is processing a complex query\n- Multiple tools (RAG, Web Search) are being used\n- The backend is under heavy load\n\nPlease try again with a simpler query, or check if the backend services (Ollama, MCP servers) are running properly."
history.append({"role": "assistant", "content": error_msg})
yield history
except requests.exceptions.RequestException as e:
error_msg = f"❌ Request Error: {str(e)}"
history.append({"role": "assistant", "content": error_msg})
yield history
except Exception as e:
error_msg = f"❌ Unexpected Error: {str(e)}"
history.append({"role": "assistant", "content": error_msg})
yield history
def get_reasoning_trace(tenant_id: str, role: str, message: str):
"""
Fetch reasoning trace and tool traces for a message using the debug endpoint.
Returns formatted markdown showing the reasoning path.
"""
if not tenant_id or not tenant_id.strip():
return "❗ Tenant ID is required."
try:
headers = {
"Content-Type": "application/json",
"x-tenant-id": tenant_id.strip(),
"x-user-role": role if role else DEFAULT_ROLE
}
response = requests.post(
f"{BACKEND_BASE_URL}/agent/debug",
json={
"tenant_id": tenant_id.strip(),
"message": message,
"conversation_history": [],
"temperature": 0.0
},
headers=headers,
timeout=60
)
if response.status_code == 200:
data = response.json()
response_data = data.get("response", {})
reasoning_trace = response_data.get("reasoning_trace", [])
tool_traces = response_data.get("tool_traces", [])
decision = response_data.get("decision", {})
# Format reasoning trace
trace_md = "## 🧠 Reasoning Path\n\n"
for idx, step in enumerate(reasoning_trace, 1):
step_name = step.get("step", "unknown")
trace_md += f"### {idx}. {step_name.replace('_', ' ').title()}\n"
if step.get("intent"):
trace_md += f"- **Intent:** {step['intent']}\n"
if step.get("match_count"):
trace_md += f"- **Rule Matches:** {step['match_count']}\n"
if step.get("hit_count"):
trace_md += f"- **RAG Hits:** {step['hit_count']}\n"
if step.get("latency_ms"):
trace_md += f"- **Latency:** {step['latency_ms']}ms\n"
if step.get("decision"):
dec = step['decision']
trace_md += f"- **Tool:** {dec.get('tool', 'N/A')}\n"
trace_md += f"- **Action:** {dec.get('action', 'N/A')}\n"
trace_md += "\n"
# Format tool traces
if tool_traces:
trace_md += "## ⚙️ Tool Invocations\n\n"
for idx, tool in enumerate(tool_traces, 1):
tool_name = tool.get("tool", tool.get("tool_name", "unknown"))
latency = tool.get("latency_ms", tool.get("latency", 0))
status = tool.get("status", "success")
trace_md += f"### {idx}. {tool_name}\n"
trace_md += f"- **Status:** {status}\n"
trace_md += f"- **Latency:** {latency}ms\n"
if tool.get("result_count"):
trace_md += f"- **Results:** {tool['result_count']}\n"
trace_md += "\n"
# Format decision
if decision:
trace_md += "## 🎯 Final Decision\n\n"
trace_md += f"- **Tool:** {decision.get('tool', 'N/A')}\n"
trace_md += f"- **Action:** {decision.get('action', 'N/A')}\n"
if decision.get('reason'):
trace_md += f"- **Reason:** {decision['reason']}\n"
return trace_md
else:
return f"❌ Error {response.status_code}: {response.text}"
except Exception as e:
return f"❌ Error fetching reasoning trace: {str(e)}"
def ingest_document(
tenant_id: str,
role: str,
source_type: str,
content: str,
document_url: str,
filename: str,
doc_id: str,
metadata_json: str
):
if not tenant_id or not tenant_id.strip():
return "❗ Tenant ID is required to ingest documents."
if not can_ingest_documents(role):
return "❌ Access Denied: You need Editor, Admin, or Owner role to ingest documents."
tenant_id = tenant_id.strip()
payload_content = content or ""
if source_type == "url" and document_url:
payload_content = document_url.strip()
metadata = {}
if filename:
metadata["filename"] = filename.strip()
if document_url:
metadata["url"] = document_url.strip()
if doc_id:
metadata["doc_id"] = doc_id.strip()
if metadata_json:
try:
extra_metadata = json.loads(metadata_json)
if isinstance(extra_metadata, dict):
metadata.update(extra_metadata)
else:
return "❗ Metadata JSON must represent an object (key/value pairs)."
except json.JSONDecodeError as exc:
return f"❗ Invalid metadata JSON: {exc}"
payload = {
"action": "ingest_document",
"tenant_id": tenant_id,
"source_type": source_type,
"content": payload_content,
"metadata": metadata
}
try:
headers = {
"Content-Type": "application/json",
"x-tenant-id": tenant_id,
"x-user-role": role if role else DEFAULT_ROLE
}
response = requests.post(
f"{BACKEND_BASE_URL}/rag/ingest-document",
json=payload,
headers=headers,
timeout=60
)
if response.status_code == 200:
data = response.json()
return f"✅ Document ingested successfully.\n\n{data.get('message', '')}"
return f"❌ Ingestion failed ({response.status_code}): {response.text}"
except requests.exceptions.ConnectionError:
return "❌ Could not reach the backend. Make sure the FastAPI server is running."
except requests.exceptions.Timeout:
return "⏱️ The ingestion request timed out. Please try again."
except Exception as exc:
return f"❌ Unexpected error during ingestion: {exc}"
def ingest_file(tenant_id: str, role: str, file_obj):
if not tenant_id or not tenant_id.strip():
return "❗ Tenant ID is required to ingest files."
if file_obj is None:
return "❗ Please select a file to upload."
if not can_ingest_documents(role):
return "❌ Access Denied: You need Editor, Admin, or Owner role to ingest files."
tenant_id = tenant_id.strip()
try:
file_path = Path(file_obj.name)
with open(file_path, "rb") as f:
file_bytes = f.read()
files = {
"file": (file_path.name, file_bytes, "application/octet-stream")
}
headers = {
"x-tenant-id": tenant_id,
"x-user-role": role if role else DEFAULT_ROLE
}
response = requests.post(
f"{BACKEND_BASE_URL}/rag/ingest-file",
files=files,
headers=headers,
timeout=120
)
if response.status_code == 200:
data = response.json()
return f"✅ File ingested successfully.\n\n{data.get('message', '')}"
return f"❌ File ingestion failed ({response.status_code}): {response.text}"
except FileNotFoundError:
return "❌ Could not read the uploaded file."
except requests.exceptions.ConnectionError:
return "❌ Could not reach the backend. Make sure the FastAPI server is running."
except requests.exceptions.Timeout:
return "⏱️ File ingestion timed out. Please try again."
except Exception as exc:
return f"❌ Unexpected error during file ingestion: {exc}"
def _format_rules_table(rules: list[str]) -> list[list]:
return [[idx + 1, rule] for idx, rule in enumerate(rules)]
def fetch_admin_rules(tenant_id: str, role: str) -> tuple[str, list[list]]:
if not tenant_id or not tenant_id.strip():
return "❗ Tenant ID is required.", []
tenant_id = tenant_id.strip()
try:
headers = {
"x-tenant-id": tenant_id,
"x-user-role": role if role else DEFAULT_ROLE
}
response = requests.get(
f"{BACKEND_BASE_URL}/admin/rules",
headers=headers,
timeout=30
)
if response.status_code == 200:
rules = response.json().get("rules", [])
if not rules:
return "✅ No admin rules have been configured yet.", []
summary = f"### Current Rules ({len(rules)})"
return summary, _format_rules_table(rules)
return f"❌ Error {response.status_code}: {response.text}", []
except requests.exceptions.ConnectionError:
return "❌ Could not reach backend. Ensure the FastAPI server is running.", []
except requests.exceptions.Timeout:
return "⏱️ Request timed out. Please try again.", []
except Exception as exc:
return f"❌ Unexpected error: {exc}", []
def extract_rules_from_file(file_path) -> str:
"""
Extract rules from uploaded file (TXT, PDF, DOC, DOCX).
Returns the extracted text content.
"""
if file_path is None:
return ""
try:
# Gradio File component returns file path as string
if isinstance(file_path, str):
file_path = Path(file_path)
else:
# Sometimes it's a file object with .name attribute
file_path = Path(file_path.name if hasattr(file_path, 'name') else file_path)
if not file_path.exists():
return f"❌ File not found: {file_path}"
file_ext = file_path.suffix.lower()
# Read file based on type
if file_ext == '.txt' or file_ext == '.md':
# Plain text files
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
return content
elif file_ext == '.pdf':
# PDF files - use PyPDF2
try:
import PyPDF2
with open(file_path, 'rb') as f:
pdf_reader = PyPDF2.PdfReader(f)
content = []
for page in pdf_reader.pages:
content.append(page.extract_text())
return '\n'.join(content)
except ImportError:
return "❌ PDF extraction requires PyPDF2. Install with: pip install PyPDF2"
except Exception as e:
return f"❌ Failed to extract text from PDF: {str(e)}"
elif file_ext in ['.doc', '.docx']:
# DOC/DOCX files - use python-docx
try:
from docx import Document
doc = Document(file_path)
content = []
for paragraph in doc.paragraphs:
content.append(paragraph.text)
return '\n'.join(content)
except ImportError:
return "❌ DOCX extraction requires python-docx. Install with: pip install python-docx"
except Exception as e:
return f"❌ Failed to extract text from DOCX: {str(e)}"
else:
return f"❌ Unsupported file type: {file_ext}. Supported: .txt, .pdf, .doc, .docx"
except Exception as e:
return f"❌ Error reading file: {str(e)}"
def add_admin_rules(tenant_id: str, role: str, rules_text: str) -> str:
if not tenant_id or not tenant_id.strip():
return "❗ Tenant ID is required."
if not rules_text or not rules_text.strip():
return "❗ Provide at least one rule to upload."
if not can_manage_rules(role):
return "❌ Access Denied: You need Admin or Owner role to manage rules."
tenant_id = tenant_id.strip()
# Filter out comment lines (starting with #) and empty lines
rules = [
rule.strip()
for rule in rules_text.splitlines()
if rule.strip() and not rule.strip().startswith("#")
]
if not rules:
return "❗ No valid rules detected. (Comment lines starting with # are ignored)"
added = []
enhanced = []
errors = []
# Process rules in chunks to avoid timeout
CHUNK_SIZE = 5 # Process 5 rules at a time
total_rules = len(rules)
if total_rules == 1:
# Single rule - use regular endpoint
try:
headers = {
"x-tenant-id": tenant_id,
"x-user-role": role if role else DEFAULT_ROLE
}
resp = requests.post(
f"{BACKEND_BASE_URL}/admin/rules",
params={"rule": rules[0], "enhance": "true"},
headers=headers,
timeout=30
)
if resp.status_code == 200:
data = resp.json()
added.append(data.get("added_rule", rules[0]))
if data.get("enhanced"):
edge_cases = data.get("edge_cases", [])
improvements = data.get("improvements", [])
explanation = data.get("explanation", "")
examples = data.get("examples", [])
missing_patterns = data.get("missing_patterns", [])
if explanation:
enhanced.append(f"**💡 Explanation:** {explanation}")
if examples:
examples_list = "\n".join([f" • {ex}" for ex in examples[:5]])
enhanced.append(f"**📋 Examples:**\n{examples_list}")
if missing_patterns:
patterns_list = "\n".join([f" • {p}" for p in missing_patterns[:5]])
enhanced.append(f"**🔍 Suggested Patterns:**\n{patterns_list}")
if edge_cases or improvements:
enhanced.append(f"**{data.get('added_rule', rules[0])}**:")
if improvements:
enhanced.append(f" • Improvements: {', '.join(improvements[:3])}")
if edge_cases:
enhanced.append(f" • Edge cases identified: {len(edge_cases)}")
else:
errors.append(f"{rules[0]} -> {resp.status_code}: {resp.text}")
except Exception as exc:
errors.append(f"{rules[0]} -> {exc}")
else:
# Multiple rules - process in chunks
for i in range(0, total_rules, CHUNK_SIZE):
chunk = rules[i:i + CHUNK_SIZE]
chunk_num = (i // CHUNK_SIZE) + 1
total_chunks = (total_rules + CHUNK_SIZE - 1) // CHUNK_SIZE
try:
headers = {
"x-tenant-id": tenant_id,
"x-user-role": role if role else DEFAULT_ROLE
}
resp = requests.post(
f"{BACKEND_BASE_URL}/admin/rules/bulk",
json={"rules": chunk},
headers=headers,
params={"enhance": "true"},
timeout=45 # Timeout per chunk (5 rules)
)
if resp.status_code == 200:
data = resp.json()
chunk_added = data.get("added_rules", [])
added.extend(chunk_added)
if data.get("enhanced"):
chunk_enhanced = data.get("enhancement_summary", [])
enhanced.extend([f"[Chunk {chunk_num}/{total_chunks}] {e}" for e in chunk_enhanced])
# Add explanations for bulk rules if available
if data.get("explanations"):
for exp in data["explanations"][:3]: # Show first 3 explanations
if exp.get("explanation"):
enhanced.append(f"\n💡 **{exp.get('rule', 'Rule')} Explanation:** {exp['explanation']}")
if exp.get("examples"):
examples_list = "\n".join([f" • {ex}" for ex in exp['examples'][:3]])
enhanced.append(f"📋 **Examples:**\n{examples_list}")
if exp.get("missing_patterns"):
patterns_list = "\n".join([f" • {p}" for p in exp['missing_patterns'][:3]])
enhanced.append(f"🔍 **Suggested Patterns:**\n{patterns_list}")
else:
errors.append(f"Chunk {chunk_num}/{total_chunks} failed: {resp.status_code}: {resp.text}")
except requests.exceptions.Timeout:
errors.append(f"Chunk {chunk_num}/{total_chunks} timed out after 45s")
except Exception as exc:
errors.append(f"Chunk {chunk_num}/{total_chunks} error: {exc}")
summary = []
if added:
summary.append(f"✅ Added {len(added)}/{total_rules} rule(s):\n" + "\n".join([f"- {r}" for r in added[:10]]))
if len(added) > 10:
summary.append(f"... and {len(added) - 10} more")
if enhanced:
summary.append(f"\n🤖 LLM Enhancement Applied:\n" + "\n".join(enhanced[:5]))
if len(enhanced) > 5:
summary.append(f"... and {len(enhanced) - 5} more enhancements")
if errors:
summary.append("\n⚠️ Errors:\n" + "\n".join(errors))
return "\n\n".join(summary) if summary else "No rules were added."
def delete_admin_rule(tenant_id: str, role: str, rule: str) -> str:
if not tenant_id or not tenant_id.strip():
return "❗ Tenant ID is required."
if not rule or not rule.strip():
return "❗ Provide the exact rule text to delete."
if not can_manage_rules(role):
return "❌ Access Denied: You need Admin or Owner role to delete rules."
tenant_id = tenant_id.strip()
rule = rule.strip()
try:
headers = {
"x-tenant-id": tenant_id,
"x-user-role": role if role else DEFAULT_ROLE
}
resp = requests.delete(
f"{BACKEND_BASE_URL}/admin/rules/{rule}",
headers=headers,
timeout=15
)
if resp.status_code == 200:
return f"🗑️ Deleted rule: {rule}"
return f"❌ Error {resp.status_code}: {resp.text}"
except requests.exceptions.ConnectionError:
return "❌ Could not reach backend. Ensure the FastAPI server is running."
except requests.exceptions.Timeout:
return "⏱️ Delete request timed out. Please try again."
except Exception as exc:
return f"❌ Unexpected error: {exc}"
def add_rules_from_file(tenant_id: str, role: str, file_path):
"""
Extract rules from uploaded file and add them.
"""
if not tenant_id or not tenant_id.strip():
return "❗ Tenant ID is required.", "👉 Click **Refresh Rules** to see existing entries.", []
if file_path is None:
return "❗ Please select a file to upload.", "👉 Click **Refresh Rules** to see existing entries.", []
# Extract text from file
extracted_text = extract_rules_from_file(file_path)
if extracted_text.startswith("❌"):
# Error occurred during extraction
summary, rows = fetch_admin_rules(tenant_id)
return extracted_text, summary, rows
if not extracted_text or not extracted_text.strip():
summary, rows = fetch_admin_rules(tenant_id)
return "❗ No text could be extracted from the file.", summary, rows
# Add rules from extracted text
status = add_admin_rules(tenant_id, role, extracted_text)
summary, rows = fetch_admin_rules(tenant_id, role)
return status, summary, rows
def add_rules_and_refresh(tenant_id: str, role: str, rules_text: str):
status = add_admin_rules(tenant_id, role, rules_text)
summary, rows = fetch_admin_rules(tenant_id, role)
return status, summary, rows
def delete_rule_and_refresh(tenant_id: str, role: str, rule: str):
status = delete_admin_rule(tenant_id, role, rule)
summary, rows = fetch_admin_rules(tenant_id, role)
return status, summary, rows
def fetch_admin_analytics(tenant_id: str, role: str):
"""Fetch analytics data and return formatted results with visualizations."""
if not tenant_id or not tenant_id.strip():
error_msg = "❗ Tenant ID is required to view analytics."
return error_msg, {}, None, None, None, None
# All roles can view analytics (matching backend permissions)
# No access check needed here
tenant_id = tenant_id.strip()
headers = {
"x-tenant-id": tenant_id,
"x-user-role": role if role else DEFAULT_ROLE
}
overview_data = {}
tool_usage_data = {}
redflags_data = {}
activity_data = {}
error_msg = None
# Fetch Overview
try:
resp = requests.get(
f"{BACKEND_BASE_URL}/analytics/overview",
headers=headers,
timeout=30
)
if resp.status_code == 200:
overview_data = resp.json()
else:
error_msg = f"❌ Error fetching overview: {resp.status_code}"
except Exception as e:
error_msg = f"❌ Error: {str(e)}"
# Fetch Tool Usage
try:
resp = requests.get(
f"{BACKEND_BASE_URL}/analytics/tool-usage",
headers=headers,
timeout=30
)
if resp.status_code == 200:
tool_usage_data = resp.json()
except Exception:
pass
# Fetch Red Flags
try:
resp = requests.get(
f"{BACKEND_BASE_URL}/analytics/redflags",
headers=headers,
timeout=30
)
if resp.status_code == 200:
redflags_data = resp.json()
except Exception:
pass
# Fetch Activity
try:
resp = requests.get(
f"{BACKEND_BASE_URL}/analytics/activity",
headers=headers,
timeout=30
)
if resp.status_code == 200:
activity_data = resp.json()
except Exception:
pass
# Extract data for visualizations
overview = overview_data.get("overview", {})
tool_usage = overview.get("tool_usage", tool_usage_data.get("tool_usage", {}))
rag_quality = overview.get("rag_quality", {})
# Create tool usage bar chart
tool_chart = None
if tool_usage and PLOTLY_AVAILABLE:
try:
tools = []
counts = []
latencies = []
colors_list = []
color_map = {
"rag": "#3b82f6",
"rag.search": "#2563eb",
"rag.ingest": "#1d4ed8",
"rag.list": "#1e40af",
"web.search": "#06b6d4",
"admin": "#a855f7",
"llm": "#10b981"
}
for tool_name, stats in tool_usage.items():
tools.append(tool_name.replace(".", " ").title())
counts.append(stats.get("count", 0))
latencies.append(stats.get("avg_latency_ms", 0))
colors_list.append(color_map.get(tool_name, "#6b7280"))
if tools:
fig = go.Figure()
fig.add_trace(go.Bar(
x=tools,
y=counts,
name="Usage Count",
marker_color=colors_list,
text=counts,
textposition='outside',
hovertemplate='%{x}
Count: %{y}
Avg Latency: %{y}ms
Value: %{y:.2f}
└ {count} calls • {latency:.1f}ms avg • {success} success • {errors} errors\n"
return summary_text, tool_usage, tool_chart, latency_chart, rag_chart, error_msg
def list_documents(tenant_id: str, role: str, limit: int = 1000, offset: int = 0):
"""
List all documents for a tenant.
Returns a tuple of (status_message, documents_list, total_count, stats_dict, chart_fig).
"""
if not tenant_id or not tenant_id.strip():
return "❗ Tenant ID is required.", [], 0, {}, None
tenant_id = tenant_id.strip()
try:
headers = {
"x-tenant-id": tenant_id,
"x-user-role": role if role else DEFAULT_ROLE
}
response = requests.get(
f"{BACKEND_BASE_URL}/rag/list",
params={"tenant_id": tenant_id, "limit": limit, "offset": offset},
headers=headers,
timeout=30
)
if response.status_code == 200:
data = response.json()
documents = data.get("documents", [])
total = data.get("total", 0)
# Format documents for display and collect stats
formatted_docs = []
type_counts = Counter()
total_length = 0
for doc in documents:
doc_id = doc.get("id", "N/A")
text = doc.get("text", "")
created_at = doc.get("created_at", "")
preview = text[:200] + "..." if len(text) > 200 else text
# Detect document type
text_lower = text.lower()
if "http://" in text_lower or "https://" in text_lower or "www." in text_lower:
doc_type = "link"
elif any(x in text_lower for x in ["q:", "question:", "faq", "frequently asked"]):
doc_type = "faq"
elif ".pdf" in text_lower or "pdf document" in text_lower:
doc_type = "pdf"
else:
doc_type = "text"
type_counts[doc_type] += 1
total_length += len(text)
formatted_docs.append({
"ID": doc_id,
"Type": doc_type,
"Preview": preview,
"Length": len(text),
"Created": created_at[:10] if created_at else "N/A"
})
# Create statistics dictionary
stats = {
"total": total,
"types": dict(type_counts),
"avg_length": total_length // total if total > 0 else 0,
"total_chars": total_length
}
# Create pie chart for document types
chart_fig = None
if type_counts and PLOTLY_AVAILABLE:
try:
labels = list(type_counts.keys())
values = list(type_counts.values())
colors = {
"text": "#3b82f6", # blue
"pdf": "#ef4444", # red
"faq": "#a855f7", # purple
"link": "#06b6d4" # cyan
}
chart_colors = [colors.get(label, "#6b7280") for label in labels]
fig = go.Figure(data=[go.Pie(
labels=labels,
values=values,
hole=0.4,
marker=dict(colors=chart_colors),
textinfo='label+percent+value',
textfont=dict(size=12),
hovertemplate='%{label}
Count: %{value}
Percentage: %{percent}
Editor role can only access Document Ingestion.
Please switch to Owner or Admin role to access Chat functionality, or go to the Document Ingestion tab.
Editor role can only access Document Ingestion.
Please switch to Owner or Admin role to access Knowledge Base Library.
Analytics is available to all roles.
If you're seeing this message, there may be a configuration issue.
Editor role can only access Document Ingestion.
Admin Rules & Compliance is restricted to Admin and Owner roles only.
Built with ❤️ using Model Context Protocol (MCP)
Enterprise-Grade MCP Autonomous Agent Platform