Spaces:

nothingworry
/

IntegraChat

Sleeping

App Files Files Community

nothingworry commited on Nov 17, 2025

Commit

ef83e66

1 Parent(s): 67b7db4

Reasoning traces, smarter tools, deterministic backend tests.

Browse files

Files changed (29) hide show

README.md +1 -1
backend/api/models/agent.py +1 -0
backend/api/models/redflag.py +2 -0
backend/api/services/agent_orchestrator.py +178 -16
backend/api/services/redflag_detector.py +53 -1
backend/api/services/semantic_encoder.py +62 -0
backend/api/services/tool_scoring.py +54 -0
backend/api/services/tool_selector.py +9 -4
backend/mcp_servers/database.py +10 -2
backend/mcp_servers/rag_server.py +34 -1
backend/tests/test_agent_orchestrator.py +13 -3
frontend/.gitignore +41 -0
frontend/README.md +36 -0
frontend/app/favicon.ico +0 -0
frontend/app/globals.css +26 -0
frontend/app/layout.tsx +34 -0
frontend/app/page.tsx +65 -0
frontend/eslint.config.mjs +18 -0
frontend/next.config.ts +7 -0
frontend/package-lock.json +0 -0
frontend/package.json +26 -0
frontend/postcss.config.mjs +7 -0
frontend/public/file.svg +1 -0
frontend/public/globe.svg +1 -0
frontend/public/next.svg +1 -0
frontend/public/vercel.svg +1 -0
frontend/public/window.svg +1 -0
frontend/tsconfig.json +34 -0
test_all.py +233 -0

README.md CHANGED Viewed

@@ -357,7 +357,7 @@ Before you begin, ensure you have the following installed:
    Create a `.env` file in the project root with the following:
    ```env
-   # Database Configuration
    POSTGRESQL_URL=postgresql://user:password@host:port/database
    SUPABASE_URL=https://your-project.supabase.co
    SUPABASE_SERVICE_KEY=your_service_role_key

    Create a `.env` file in the project root with the following:
    ```env
+   # Database Configurationa
    POSTGRESQL_URL=postgresql://user:password@host:port/database
    SUPABASE_URL=https://your-project.supabase.co
    SUPABASE_SERVICE_KEY=your_service_role_key

backend/api/models/agent.py CHANGED Viewed

@@ -21,4 +21,5 @@ class AgentResponse(BaseModel):
     text: str
     decision: AgentDecision
     tool_traces: List[Dict[str, Any]] = []

     text: str
     decision: AgentDecision
     tool_traces: List[Dict[str, Any]] = []
+    reasoning_trace: List[Dict[str, Any]] = []

backend/api/models/redflag.py CHANGED Viewed

@@ -20,4 +20,6 @@ class RedFlagMatch:
     severity: str
     description: str
     matched_text: str

     severity: str
     description: str
     matched_text: str
+    confidence: float | None = None
+    explanation: str | None = None

backend/api/services/agent_orchestrator.py CHANGED Viewed

@@ -9,6 +9,7 @@ Place at: backend/api/services/agent_orchestrator.py
 from __future__ import annotations
 import json
 import os
 from typing import List, Dict, Any, Optional
@@ -20,6 +21,7 @@ from .intent_classifier import IntentClassifier
 from .tool_selector import ToolSelector
 from .llm_client import LLMClient
 from ..mcp_clients.mcp_client import MCPClient
 class AgentOrchestrator:
@@ -37,10 +39,24 @@ class AgentOrchestrator:
         self.intent = IntentClassifier(llm_client=self.llm)
         self.selector = ToolSelector(llm_client=self.llm)
     async def handle(self, req: AgentRequest) -> AgentResponse:
         # 1) Red-flag check (async)
         matches: List[RedFlagMatch] = await self.redflag.check(req.tenant_id, req.message)
         if matches:
             # Notify admin asynchronously (do not await blocking the response path if you prefer)
@@ -59,11 +75,16 @@ class AgentOrchestrator:
             return AgentResponse(
                 text="Your request has been blocked due to policy.",
                 decision=decision,
-                tool_traces=[{"redflags": [m.__dict__ for m in matches]}]
             )
         # 2) Intent classification
         intent = await self.intent.classify(req.message)
         # 2.5) Pre-fetch RAG results if available (for tool selector context)
         rag_prefetch = None
@@ -73,16 +94,38 @@ class AgentOrchestrator:
             rag_prefetch = await self.mcp.call_rag(req.tenant_id, req.message)
             if isinstance(rag_prefetch, dict):
                 rag_results = rag_prefetch.get("results") or rag_prefetch.get("hits") or []
-        except Exception:
             # If RAG fails, continue without it
-            pass
         # 3) Tool selection (hybrid) - pass RAG results in context
         ctx = {
             "tenant_id": req.tenant_id,
-            "rag_results": rag_results
         }
         decision = await self.selector.select(intent, req.message, ctx)
         tool_traces: List[Dict[str, Any]] = []
@@ -90,7 +133,14 @@ class AgentOrchestrator:
         if decision.action == "multi_step" and decision.tool_input:
             steps = decision.tool_input.get("steps", [])
             if steps:
-                return await self._execute_multi_step(req, steps, decision, tool_traces, rag_prefetch)
         # 5) Execute single tool
         if decision.action == "call_tool" and decision.tool:
@@ -98,25 +148,54 @@ class AgentOrchestrator:
                 if decision.tool == "rag":
                     rag_resp = await self.mcp.call_rag(req.tenant_id, decision.tool_input.get("query") if decision.tool_input else req.message)
                     tool_traces.append({"tool": "rag", "response": rag_resp})
                     prompt = self._build_prompt_with_rag(req, rag_resp)
                     llm_out = await self.llm.simple_call(prompt, temperature=req.temperature)
-                    return AgentResponse(text=llm_out, decision=decision, tool_traces=tool_traces)
                 if decision.tool == "web":
                     web_resp = await self.mcp.call_web(req.tenant_id, decision.tool_input.get("query") if decision.tool_input else req.message)
                     tool_traces.append({"tool": "web", "response": web_resp})
                     prompt = self._build_prompt_with_web(req, web_resp)
                     llm_out = await self.llm.simple_call(prompt, temperature=req.temperature)
-                    return AgentResponse(text=llm_out, decision=decision, tool_traces=tool_traces)
                 if decision.tool == "admin":
                     admin_resp = await self.mcp.call_admin(req.tenant_id, decision.tool_input.get("query") if decision.tool_input else req.message)
                     tool_traces.append({"tool": "admin", "response": admin_resp})
-                    return AgentResponse(text=json.dumps(admin_resp), decision=decision, tool_traces=tool_traces)
                 if decision.tool == "llm":
                     llm_out = await self.llm.simple_call(req.message, temperature=req.temperature)
-                    return AgentResponse(text=llm_out, decision=decision)
             except Exception as e:
                 tool_traces.append({"tool": decision.tool, "error": str(e)})
@@ -127,7 +206,12 @@ class AgentOrchestrator:
                 return AgentResponse(
                     text=fallback,
                     decision=AgentDecision(action="respond", tool=None, tool_input=None, reason=f"tool_error_fallback: {e}"),
-                    tool_traces=tool_traces
                 )
         # Default: direct LLM response
@@ -136,10 +220,16 @@ class AgentOrchestrator:
         except Exception as e:
             # If LLM fails, return a helpful error message
             llm_out = f"I apologize, but I'm unable to process your request right now. The AI service is unavailable: {str(e)}"
         return AgentResponse(
             text=llm_out,
-            decision=AgentDecision(action="respond", tool=None, tool_input=None, reason="default_llm")
         )
     def _build_prompt_with_rag(self, req: AgentRequest, rag_resp: Dict[str, Any]) -> str:
@@ -160,6 +250,7 @@ class AgentOrchestrator:
     async def _execute_multi_step(self, req: AgentRequest, steps: List[Dict[str, Any]],
                                    decision: AgentDecision, tool_traces: List[Dict[str, Any]],
                                    pre_fetched_rag: Optional[Dict[str, Any]] = None) -> AgentResponse:
         """
         Execute multiple tools in sequence and synthesize results with LLM.
@@ -169,6 +260,14 @@ class AgentOrchestrator:
         admin_data = None
         collected_data = []
         # Execute each step in sequence
         for step_info in steps:
             tool_name = step_info.get("tool")
@@ -178,13 +277,22 @@ class AgentOrchestrator:
             try:
                 if tool_name == "rag":
                     # Reuse pre-fetched RAG if available, otherwise fetch
-                    if pre_fetched_rag:
                         rag_resp = pre_fetched_rag
                         tool_traces.append({"tool": "rag", "response": rag_resp, "note": "used_pre_fetched"})
                     else:
                         rag_resp = await self.mcp.call_rag(req.tenant_id, query)
                         tool_traces.append({"tool": "rag", "response": rag_resp})
                     rag_data = rag_resp
                     # Extract snippets for prompt
                     if isinstance(rag_resp, dict):
                         hits = rag_resp.get("results") or rag_resp.get("hits") or []
@@ -193,9 +301,19 @@ class AgentOrchestrator:
                             collected_data.append(f"[RAG] {txt}")
                 elif tool_name == "web":
-                    web_resp = await self.mcp.call_web(req.tenant_id, query)
-                    tool_traces.append({"tool": "web", "response": web_resp})
                     web_data = web_resp
                     # Extract snippets for prompt
                     if isinstance(web_resp, dict):
                         hits = web_resp.get("results") or web_resp.get("items") or []
@@ -210,6 +328,11 @@ class AgentOrchestrator:
                     tool_traces.append({"tool": "admin", "response": admin_resp})
                     admin_data = admin_resp
                     collected_data.append(f"[ADMIN] {json.dumps(admin_resp)}")
                 elif tool_name == "llm":
                     # LLM is always last - synthesize all collected data
@@ -218,6 +341,11 @@ class AgentOrchestrator:
             except Exception as e:
                 tool_traces.append({"tool": tool_name, "error": str(e)})
                 # Continue with other tools even if one fails
         # Build comprehensive prompt with all collected data
         data_section = "\n---\n".join(collected_data) if collected_data else ""
@@ -241,7 +369,11 @@ class AgentOrchestrator:
             return AgentResponse(
                 text=llm_out,
                 decision=decision,
-                tool_traces=tool_traces
             )
         except Exception as e:
             tool_traces.append({"tool": "llm", "error": str(e)})
@@ -254,7 +386,12 @@ class AgentOrchestrator:
                     tool_input=None,
                     reason=f"multi_step_llm_error: {e}"
                 ),
-                tool_traces=tool_traces
             )
     def _build_prompt_with_web(self, req: AgentRequest, web_resp: Dict[str, Any]) -> str:
@@ -273,3 +410,28 @@ class AgentOrchestrator:
             f"User question: {req.message}\nAnswer succinctly and indicate which results you used."
         )
         return prompt

 from __future__ import annotations
+import asyncio
 import json
 import os
 from typing import List, Dict, Any, Optional
 from .tool_selector import ToolSelector
 from .llm_client import LLMClient
 from ..mcp_clients.mcp_client import MCPClient
+from .tool_scoring import ToolScoringService
 class AgentOrchestrator:
         self.intent = IntentClassifier(llm_client=self.llm)
         self.selector = ToolSelector(llm_client=self.llm)
+        self.tool_scorer = ToolScoringService()
     async def handle(self, req: AgentRequest) -> AgentResponse:
+        reasoning_trace: List[Dict[str, Any]] = []
+        reasoning_trace.append({
+            "step": "request_received",
+            "tenant_id": req.tenant_id,
+            "user_id": req.user_id,
+            "message_preview": req.message[:120]
+        })
         # 1) Red-flag check (async)
         matches: List[RedFlagMatch] = await self.redflag.check(req.tenant_id, req.message)
+        reasoning_trace.append({
+            "step": "redflag_check",
+            "match_count": len(matches),
+            "matches": [m.__dict__ for m in matches]
+        })
         if matches:
             # Notify admin asynchronously (do not await blocking the response path if you prefer)
             return AgentResponse(
                 text="Your request has been blocked due to policy.",
                 decision=decision,
+                tool_traces=[{"redflags": [m.__dict__ for m in matches]}],
+                reasoning_trace=reasoning_trace
             )
         # 2) Intent classification
         intent = await self.intent.classify(req.message)
+        reasoning_trace.append({
+            "step": "intent_detection",
+            "intent": intent
+        })
         # 2.5) Pre-fetch RAG results if available (for tool selector context)
         rag_prefetch = None
             rag_prefetch = await self.mcp.call_rag(req.tenant_id, req.message)
             if isinstance(rag_prefetch, dict):
                 rag_results = rag_prefetch.get("results") or rag_prefetch.get("hits") or []
+            reasoning_trace.append({
+                "step": "rag_prefetch",
+                "status": "ok",
+                "hit_count": len(rag_results)
+            })
+        except Exception as pref_err:
             # If RAG fails, continue without it
+            reasoning_trace.append({
+                "step": "rag_prefetch",
+                "status": "error",
+                "error": str(pref_err)
+            })
+            rag_prefetch = None
+        tool_scores = self.tool_scorer.score(req.message, intent, rag_results)
+        reasoning_trace.append({
+            "step": "tool_scoring",
+            "scores": tool_scores
+        })
         # 3) Tool selection (hybrid) - pass RAG results in context
         ctx = {
             "tenant_id": req.tenant_id,
+            "rag_results": rag_results,
+            "tool_scores": tool_scores
         }
         decision = await self.selector.select(intent, req.message, ctx)
+        reasoning_trace.append({
+            "step": "tool_selection",
+            "decision": decision.dict(),
+            "context_scores": tool_scores
+        })
         tool_traces: List[Dict[str, Any]] = []
         if decision.action == "multi_step" and decision.tool_input:
             steps = decision.tool_input.get("steps", [])
             if steps:
+                return await self._execute_multi_step(
+                    req,
+                    steps,
+                    decision,
+                    tool_traces,
+                    reasoning_trace,
+                    rag_prefetch
+                )
         # 5) Execute single tool
         if decision.action == "call_tool" and decision.tool:
                 if decision.tool == "rag":
                     rag_resp = await self.mcp.call_rag(req.tenant_id, decision.tool_input.get("query") if decision.tool_input else req.message)
                     tool_traces.append({"tool": "rag", "response": rag_resp})
+                    reasoning_trace.append({
+                        "step": "tool_execution",
+                        "tool": "rag",
+                        "hit_count": len(self._extract_hits(rag_resp)),
+                        "summary": self._summarize_hits(rag_resp, limit=2)
+                    })
                     prompt = self._build_prompt_with_rag(req, rag_resp)
                     llm_out = await self.llm.simple_call(prompt, temperature=req.temperature)
+                    reasoning_trace.append({
+                        "step": "llm_response",
+                        "mode": "rag_synthesis"
+                    })
+                    return AgentResponse(text=llm_out, decision=decision, tool_traces=tool_traces, reasoning_trace=reasoning_trace)
                 if decision.tool == "web":
                     web_resp = await self.mcp.call_web(req.tenant_id, decision.tool_input.get("query") if decision.tool_input else req.message)
                     tool_traces.append({"tool": "web", "response": web_resp})
+                    reasoning_trace.append({
+                        "step": "tool_execution",
+                        "tool": "web",
+                        "hit_count": len(self._extract_hits(web_resp)),
+                        "summary": self._summarize_hits(web_resp, limit=2)
+                    })
                     prompt = self._build_prompt_with_web(req, web_resp)
                     llm_out = await self.llm.simple_call(prompt, temperature=req.temperature)
+                    reasoning_trace.append({
+                        "step": "llm_response",
+                        "mode": "web_synthesis"
+                    })
+                    return AgentResponse(text=llm_out, decision=decision, tool_traces=tool_traces, reasoning_trace=reasoning_trace)
                 if decision.tool == "admin":
                     admin_resp = await self.mcp.call_admin(req.tenant_id, decision.tool_input.get("query") if decision.tool_input else req.message)
                     tool_traces.append({"tool": "admin", "response": admin_resp})
+                    reasoning_trace.append({
+                        "step": "tool_execution",
+                        "tool": "admin",
+                        "status": "completed"
+                    })
+                    return AgentResponse(text=json.dumps(admin_resp), decision=decision, tool_traces=tool_traces, reasoning_trace=reasoning_trace)
                 if decision.tool == "llm":
                     llm_out = await self.llm.simple_call(req.message, temperature=req.temperature)
+                    reasoning_trace.append({
+                        "step": "llm_response",
+                        "mode": "direct"
+                    })
+                    return AgentResponse(text=llm_out, decision=decision, reasoning_trace=reasoning_trace)
             except Exception as e:
                 tool_traces.append({"tool": decision.tool, "error": str(e)})
                 return AgentResponse(
                     text=fallback,
                     decision=AgentDecision(action="respond", tool=None, tool_input=None, reason=f"tool_error_fallback: {e}"),
+                    tool_traces=tool_traces,
+                    reasoning_trace=reasoning_trace + [{
+                        "step": "error",
+                        "tool": decision.tool,
+                        "error": str(e)
+                    }]
                 )
         # Default: direct LLM response
         except Exception as e:
             # If LLM fails, return a helpful error message
             llm_out = f"I apologize, but I'm unable to process your request right now. The AI service is unavailable: {str(e)}"
+            reasoning_trace.append({
+                "step": "error",
+                "tool": "llm",
+                "error": str(e)
+            })
         return AgentResponse(
             text=llm_out,
+            decision=AgentDecision(action="respond", tool=None, tool_input=None, reason="default_llm"),
+            reasoning_trace=reasoning_trace
         )
     def _build_prompt_with_rag(self, req: AgentRequest, rag_resp: Dict[str, Any]) -> str:
     async def _execute_multi_step(self, req: AgentRequest, steps: List[Dict[str, Any]],
                                    decision: AgentDecision, tool_traces: List[Dict[str, Any]],
+                                   reasoning_trace: List[Dict[str, Any]],
                                    pre_fetched_rag: Optional[Dict[str, Any]] = None) -> AgentResponse:
         """
         Execute multiple tools in sequence and synthesize results with LLM.
         admin_data = None
         collected_data = []
+        parallel_tasks = {}
+        rag_parallel_query = self._first_query_for_tool(steps, "rag", req.message)
+        web_parallel_query = self._first_query_for_tool(steps, "web", req.message)
+        if rag_parallel_query and web_parallel_query and rag_parallel_query == web_parallel_query:
+            if not pre_fetched_rag:
+                parallel_tasks["rag"] = asyncio.create_task(self.mcp.call_rag(req.tenant_id, rag_parallel_query))
+            parallel_tasks["web"] = asyncio.create_task(self.mcp.call_web(req.tenant_id, web_parallel_query))
         # Execute each step in sequence
         for step_info in steps:
             tool_name = step_info.get("tool")
             try:
                 if tool_name == "rag":
                     # Reuse pre-fetched RAG if available, otherwise fetch
+                    if pre_fetched_rag and query == rag_parallel_query:
                         rag_resp = pre_fetched_rag
                         tool_traces.append({"tool": "rag", "response": rag_resp, "note": "used_pre_fetched"})
+                    elif parallel_tasks.get("rag") and query == rag_parallel_query:
+                        rag_resp = await parallel_tasks["rag"]
+                        tool_traces.append({"tool": "rag", "response": rag_resp, "note": "parallel"})
                     else:
                         rag_resp = await self.mcp.call_rag(req.tenant_id, query)
                         tool_traces.append({"tool": "rag", "response": rag_resp})
                     rag_data = rag_resp
+                    reasoning_trace.append({
+                        "step": "tool_execution",
+                        "tool": "rag",
+                        "hit_count": len(self._extract_hits(rag_resp)),
+                        "summary": self._summarize_hits(rag_resp, limit=2)
+                    })
                     # Extract snippets for prompt
                     if isinstance(rag_resp, dict):
                         hits = rag_resp.get("results") or rag_resp.get("hits") or []
                             collected_data.append(f"[RAG] {txt}")
                 elif tool_name == "web":
+                    if parallel_tasks.get("web") and query == web_parallel_query:
+                        web_resp = await parallel_tasks["web"]
+                        tool_traces.append({"tool": "web", "response": web_resp, "note": "parallel"})
+                    else:
+                        web_resp = await self.mcp.call_web(req.tenant_id, query)
+                        tool_traces.append({"tool": "web", "response": web_resp})
                     web_data = web_resp
+                    reasoning_trace.append({
+                        "step": "tool_execution",
+                        "tool": "web",
+                        "hit_count": len(self._extract_hits(web_resp)),
+                        "summary": self._summarize_hits(web_resp, limit=2)
+                    })
                     # Extract snippets for prompt
                     if isinstance(web_resp, dict):
                         hits = web_resp.get("results") or web_resp.get("items") or []
                     tool_traces.append({"tool": "admin", "response": admin_resp})
                     admin_data = admin_resp
                     collected_data.append(f"[ADMIN] {json.dumps(admin_resp)}")
+                    reasoning_trace.append({
+                        "step": "tool_execution",
+                        "tool": "admin",
+                        "status": "completed"
+                    })
                 elif tool_name == "llm":
                     # LLM is always last - synthesize all collected data
             except Exception as e:
                 tool_traces.append({"tool": tool_name, "error": str(e)})
                 # Continue with other tools even if one fails
+                reasoning_trace.append({
+                    "step": "error",
+                    "tool": tool_name,
+                    "error": str(e)
+                })
         # Build comprehensive prompt with all collected data
         data_section = "\n---\n".join(collected_data) if collected_data else ""
             return AgentResponse(
                 text=llm_out,
                 decision=decision,
+                tool_traces=tool_traces,
+                reasoning_trace=reasoning_trace + [{
+                    "step": "llm_response",
+                    "mode": "multi_step"
+                }]
             )
         except Exception as e:
             tool_traces.append({"tool": "llm", "error": str(e)})
                     tool_input=None,
                     reason=f"multi_step_llm_error: {e}"
                 ),
+                tool_traces=tool_traces,
+                reasoning_trace=reasoning_trace + [{
+                    "step": "error",
+                    "tool": "llm",
+                    "error": str(e)
+                }]
             )
     def _build_prompt_with_web(self, req: AgentRequest, web_resp: Dict[str, Any]) -> str:
             f"User question: {req.message}\nAnswer succinctly and indicate which results you used."
         )
         return prompt
+    @staticmethod
+    def _extract_hits(resp: Optional[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        if not isinstance(resp, dict):
+            return []
+        return resp.get("results") or resp.get("hits") or resp.get("items") or []
+    def _summarize_hits(self, resp: Optional[Dict[str, Any]], limit: int = 3) -> List[str]:
+        hits = self._extract_hits(resp)
+        summaries = []
+        for hit in hits[:limit]:
+            if isinstance(hit, dict):
+                snippet = hit.get("text") or hit.get("content") or hit.get("snippet") or ""
+            else:
+                snippet = str(hit)
+            summaries.append(snippet[:160])
+        return summaries
+    @staticmethod
+    def _first_query_for_tool(steps: List[Dict[str, Any]], tool_name: str, default_query: str) -> Optional[str]:
+        for step in steps:
+            if step.get("tool") == tool_name:
+                input_data = step.get("input") or {}
+                return input_data.get("query") or default_query
+        return None

backend/api/services/redflag_detector.py CHANGED Viewed

@@ -14,11 +14,12 @@ Enterprise RedFlagDetector
 import os
 import re
 import time
-from dataclasses import dataclass
 from typing import List, Dict, Any, Optional
 import httpx
 from ..models.redflag import RedFlagRule, RedFlagMatch
 class RedFlagDetector:
@@ -29,6 +30,7 @@ class RedFlagDetector:
         self.admin_mcp_url = admin_mcp_url or os.getenv("ADMIN_MCP_URL")
         self.cache_ttl = cache_ttl
         self._rules_cache: Dict[str, Dict[str, Any]] = {}  # tenant_id -> {"fetched_at":ts, "rules":[...]}
         self._client = httpx.AsyncClient(timeout=15)
     async def _fetch_rules_from_supabase(self, tenant_id: str) -> List[RedFlagRule]:
@@ -84,6 +86,17 @@ class RedFlagDetector:
         rules = await self._fetch_rules_from_supabase(tenant_id)
         self._rules_cache[tenant_id] = {"fetched_at": now, "rules": rules}
         return rules
     async def check(self, tenant_id: str, text: str) -> List[RedFlagMatch]:
@@ -95,6 +108,7 @@ class RedFlagDetector:
         matches: List[RedFlagMatch] = []
         text_lower = text.lower()
         for rule in rules:
             if not rule.enabled:
@@ -102,12 +116,17 @@ class RedFlagDetector:
             matched = False
             matched_text = ""
             # 1) Keyword quick-check (cheap)
             for kw in (rule.keywords or []):
                 if kw and kw.lower() in text_lower:
                     matched = True
                     matched_text = kw
                     break
             # 2) Regex check (more precise)
@@ -118,10 +137,15 @@ class RedFlagDetector:
                     if m:
                         matched = True
                         matched_text = m.group(0)
                 except re.error:
                     # invalid regex; skip this rule
                     continue
             if matched:
                 matches.append(
                     RedFlagMatch(
@@ -130,6 +154,20 @@ class RedFlagDetector:
                         severity=rule.severity,
                         description=rule.description,
                         matched_text=matched_text,
                     )
                 )
@@ -161,3 +199,17 @@ class RedFlagDetector:
     async def close(self):
         await self._client.aclose()

 import os
 import re
 import time
 from typing import List, Dict, Any, Optional
 import httpx
 from ..models.redflag import RedFlagRule, RedFlagMatch
+from .semantic_encoder import embed_text, cosine_similarity
 class RedFlagDetector:
         self.admin_mcp_url = admin_mcp_url or os.getenv("ADMIN_MCP_URL")
         self.cache_ttl = cache_ttl
         self._rules_cache: Dict[str, Dict[str, Any]] = {}  # tenant_id -> {"fetched_at":ts, "rules":[...]}
+        self._rule_embeddings: Dict[str, Dict[str, List[float]]] = {}
         self._client = httpx.AsyncClient(timeout=15)
     async def _fetch_rules_from_supabase(self, tenant_id: str) -> List[RedFlagRule]:
         rules = await self._fetch_rules_from_supabase(tenant_id)
         self._rules_cache[tenant_id] = {"fetched_at": now, "rules": rules}
+        # Pre-compute embeddings for semantic scoring
+        embed_map: Dict[str, List[float]] = {}
+        for rule in rules:
+            try:
+                text_for_embedding = " ".join(
+                    [piece for piece in [rule.description, rule.pattern] if piece]
+                ).strip() or rule.id
+                embed_map[rule.id] = embed_text(text_for_embedding)
+            except Exception:
+                embed_map[rule.id] = []
+        self._rule_embeddings[tenant_id] = embed_map
         return rules
     async def check(self, tenant_id: str, text: str) -> List[RedFlagMatch]:
         matches: List[RedFlagMatch] = []
         text_lower = text.lower()
+        text_vector = embed_text(text)
         for rule in rules:
             if not rule.enabled:
             matched = False
             matched_text = ""
+            match_source = ""
+            keyword_score = 0.0
+            regex_score = 0.0
             # 1) Keyword quick-check (cheap)
             for kw in (rule.keywords or []):
                 if kw and kw.lower() in text_lower:
                     matched = True
                     matched_text = kw
+                    keyword_score = 0.8
+                    match_source = "keyword"
                     break
             # 2) Regex check (more precise)
                     if m:
                         matched = True
                         matched_text = m.group(0)
+                        regex_score = 1.0
+                        match_source = "regex"
                 except re.error:
                     # invalid regex; skip this rule
                     continue
+            semantic_score = self._semantic_score(tenant_id, rule.id, text_vector)
+            confidence = max(semantic_score, keyword_score, regex_score)
             if matched:
                 matches.append(
                     RedFlagMatch(
                         severity=rule.severity,
                         description=rule.description,
                         matched_text=matched_text,
+                        confidence=round(confidence, 2),
+                        explanation=self._build_explanation(rule, match_source, matched_text, confidence),
+                    )
+                )
+            elif semantic_score >= 0.82:
+                matches.append(
+                    RedFlagMatch(
+                        rule_id=rule.id,
+                        pattern=rule.pattern,
+                        severity=rule.severity,
+                        description=rule.description,
+                        matched_text=matched_text or "",
+                        confidence=round(semantic_score, 2),
+                        explanation=self._build_explanation(rule, "semantic", matched_text, semantic_score),
                     )
                 )
     async def close(self):
         await self._client.aclose()
+    def _semantic_score(self, tenant_id: str, rule_id: str, text_vector: List[float]) -> float:
+        rule_vectors = self._rule_embeddings.get(tenant_id, {})
+        rule_vector = rule_vectors.get(rule_id)
+        if not rule_vector:
+            return 0.0
+        return cosine_similarity(rule_vector, text_vector)
+    @staticmethod
+    def _build_explanation(rule: RedFlagRule, source: str, matched_text: str, confidence: float) -> str:
+        base = f"Matched rule '{rule.description or rule.id}' via {source or 'heuristics'}"
+        if matched_text:
+            base += f" on span \"{matched_text}\""
+        return f"{base}. confidence={round(confidence, 2)}"

backend/api/services/semantic_encoder.py ADDED Viewed

	@@ -0,0 +1,62 @@

+"""
+Shared semantic encoding utilities for backend services.
+"""
+from __future__ import annotations
+from functools import lru_cache
+from typing import Iterable, List, Optional
+import hashlib
+import numpy as np
+try:
+    from sentence_transformers import SentenceTransformer
+except ImportError:  # pragma: no cover - optional dependency
+    SentenceTransformer = None
+@lru_cache(maxsize=1)
+def _get_model() -> Optional[SentenceTransformer]:
+    """
+    Lazily load the MiniLM encoder once per process.
+    """
+    if SentenceTransformer is None:
+        return None
+    return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+def embed_text(text: str) -> List[float]:
+    """
+    Generate an embedding for the provided text.
+    """
+    if not text:
+        text = ""
+    model = _get_model()
+    if model is None:
+        return _fallback_embed(text)
+    vector = model.encode(text)
+    return vector.tolist()
+def cosine_similarity(vec_a: Iterable[float], vec_b: Iterable[float]) -> float:
+    a = np.array(list(vec_a), dtype=float)
+    b = np.array(list(vec_b), dtype=float)
+    denom = (np.linalg.norm(a) * np.linalg.norm(b))
+    if denom == 0:
+        return 0.0
+    return float(np.dot(a, b) / denom)
+def _fallback_embed(text: str, dim: int = 64) -> List[float]:
+    """
+    Deterministic hashing-based embedding used when sentence-transformers
+    is not available (e.g., during slim CI environments).
+    """
+    vector = [0.0] * dim
+    for token in text.lower().split():
+        digest = hashlib.sha256(token.encode("utf-8")).hexdigest()
+        idx = int(digest, 16) % dim
+        vector[idx] += 1.0
+    return vector

backend/api/services/tool_scoring.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Dict, List
+from .semantic_encoder import embed_text, cosine_similarity
+def _normalize(score: float) -> float:
+    return max(0.0, min(1.0, score))
+@dataclass
+class ToolScoringService:
+    """
+    Heuristic + semantic tool fitness scoring.
+    """
+    _domain_prompts: Dict[str, str] = field(default_factory=lambda: {
+        "rag": "internal company policy, handbook, corporate procedure, proprietary",
+        "web": "latest external news, public web search, trending topics, live data",
+        "llm": "casual chit chat, brainstorming, creative writing, general knowledge"
+    })
+    _domain_vectors: Dict[str, List[float]] = field(init=False)
+    def __post_init__(self):
+        self._domain_vectors = {
+            name: embed_text(prompt)
+            for name, prompt in self._domain_prompts.items()
+        }
+    def score(self, message: str, intent: str, rag_results: List[Dict]) -> Dict[str, float]:
+        embedding = embed_text(message)
+        rag_sem = cosine_similarity(embedding, self._domain_vectors["rag"])
+        web_sem = cosine_similarity(embedding, self._domain_vectors["web"])
+        llm_sem = cosine_similarity(embedding, self._domain_vectors["llm"])
+        rag_signal = 0.4 * rag_sem + 0.4 * (1 if rag_results else 0) + 0.2 * (1 if intent == "rag" else 0)
+        web_signal = 0.5 * web_sem + 0.3 * (1 if intent == "web" else 0) + 0.2 * self._freshness_signal(message)
+        llm_signal = 0.6 * llm_sem + 0.4 * (1 if intent == "general" else 0)
+        return {
+            "rag_fitness": round(_normalize(rag_signal), 3),
+            "web_fitness": round(_normalize(web_signal), 3),
+            "llm_only": round(_normalize(llm_signal), 3)
+        }
+    @staticmethod
+    def _freshness_signal(message: str) -> float:
+        tokens = ("news", "today", "latest", "current", "breaking", "update", "recent", "now")
+        msg = message.lower()
+        hits = sum(1 for token in tokens if token in msg)
+        return min(1.0, hits / 3.0)

backend/api/services/tool_selector.py CHANGED Viewed

@@ -10,6 +10,10 @@ class ToolSelector:
     async def select(self, intent: str, text: str, ctx):
         msg = text.lower().strip()
         # ---------------------------------
         # 1. Detect ADMIN RULES FIRST
@@ -35,9 +39,9 @@ class ToolSelector:
             r"company", r"internal", r"documentation", r"our ", r"your ",
             r"knowledge base", r"private", r"internal docs", r"corporate"
         ]
-        if rag_has_data or any(re.search(p, msg) for p in rag_patterns):
             needs_rag = True
-            if rag_has_data:
                 steps.append(step("rag", {"query": text}))
         # ---------------------------------
@@ -48,7 +52,7 @@ class ToolSelector:
             r"tell me about ", r"define ", r"explain ",
             r"history of ", r"information about", r"details about"
         ]
-        if any(re.search(p, msg) for p in fact_patterns):
             needs_web = True
             steps.append(step("web", {"query": text}))
@@ -88,6 +92,7 @@ TOOLS:
 Current context:
 - RAG available: {rag_has_data}
 - User message: "{text}"
 Determine which tools are needed. You can select:
 - Just LLM (simple questions)
@@ -140,7 +145,7 @@ Only return the JSON array. Do not include markdown formatting.
         # Build reason string showing the tool sequence
         tool_names = [s["tool"] for s in steps]
-        reason = f"multi-tool plan: {' → '.join(tool_names)}"
         return _multi_step(steps, reason)

     async def select(self, intent: str, text: str, ctx):
         msg = text.lower().strip()
+        tool_scores = ctx.get("tool_scores", {})
+        rag_score = tool_scores.get("rag_fitness", 0.0)
+        web_score = tool_scores.get("web_fitness", 0.0)
+        llm_score = tool_scores.get("llm_only", 0.0)
         # ---------------------------------
         # 1. Detect ADMIN RULES FIRST
             r"company", r"internal", r"documentation", r"our ", r"your ",
             r"knowledge base", r"private", r"internal docs", r"corporate"
         ]
+        if rag_has_data or rag_score >= 0.55 or any(re.search(p, msg) for p in rag_patterns):
             needs_rag = True
+            if not any(s["tool"] == "rag" for s in steps):
                 steps.append(step("rag", {"query": text}))
         # ---------------------------------
             r"tell me about ", r"define ", r"explain ",
             r"history of ", r"information about", r"details about"
         ]
+        if web_score >= 0.55 or any(re.search(p, msg) for p in fact_patterns):
             needs_web = True
             steps.append(step("web", {"query": text}))
 Current context:
 - RAG available: {rag_has_data}
 - User message: "{text}"
+- Tool scores: {json.dumps(tool_scores)}
 Determine which tools are needed. You can select:
 - Just LLM (simple questions)
         # Build reason string showing the tool sequence
         tool_names = [s["tool"] for s in steps]
+        reason = f"multi-tool plan: {' → '.join(tool_names)} | scores={tool_scores}"
         return _multi_step(steps, reason)

backend/mcp_servers/database.py CHANGED Viewed

@@ -132,7 +132,7 @@ def insert_document_chunks(tenant_id: str, text: str, embedding: list):
         raise
-def search_vectors(tenant_id: str, vector: list, limit: int = 5) -> List[str]:
     """
     Perform semantic vector search using pgvector.
     """
@@ -158,7 +158,15 @@ def search_vectors(tenant_id: str, vector: list, limit: int = 5) -> List[str]:
         cur.close()
         conn.close()
-        return [row["chunk_text"] for row in rows]
     except Exception as e:
         print("DB SEARCH ERROR:", e)

         raise
+def search_vectors(tenant_id: str, vector: list, limit: int = 5) -> List[Dict[str, Any]]:
     """
     Perform semantic vector search using pgvector.
     """
         cur.close()
         conn.close()
+        results: List[Dict[str, Any]] = []
+        for row in rows:
+            results.append(
+                {
+                    "text": row["chunk_text"],
+                    "similarity": float(row.get("similarity", 0.0)),
+                }
+            )
+        return results
     except Exception as e:
         print("DB SEARCH ERROR:", e)

backend/mcp_servers/rag_server.py CHANGED Viewed

@@ -11,6 +11,8 @@ import os
 current_dir = os.path.dirname(__file__)
 sys.path.insert(0, current_dir)
 from embeddings import embed_text
 from database import insert_document_chunks, search_vectors
 from models.rag import IngestRequest, SearchRequest
@@ -47,11 +49,42 @@ async def ingest(req: IngestRequest):
     return {"status": "ok"}
 @rag_app.post("/search")
 async def search(req: SearchRequest):
     vector = embed_text(req.query)
     results = db_search(req.tenant_id, vector)
-    return {"results": results}
 if __name__ == "__main__":

 current_dir = os.path.dirname(__file__)
 sys.path.insert(0, current_dir)
+from typing import Any, Dict, List
 from embeddings import embed_text
 from database import insert_document_chunks, search_vectors
 from models.rag import IngestRequest, SearchRequest
     return {"status": "ok"}
+def cosine_similarity(vec_a: List[float], vec_b: List[float]) -> float:
+    import math
+    if not vec_a or not vec_b:
+        return 0.0
+    numerator = sum(a * b for a, b in zip(vec_a, vec_b))
+    denom = math.sqrt(sum(a * a for a in vec_a)) * math.sqrt(sum(b * b for b in vec_b))
+    if denom == 0:
+        return 0.0
+    return numerator / denom
+def rank_chunks(chunks: List[Dict[str, Any]], query_embedding: List[float]):
+    ranked = []
+    for chunk in chunks:
+        chunk_vector = embed_text(chunk.get("text", ""))
+        relevance = cosine_similarity(chunk_vector, query_embedding)
+        chunk["relevance"] = relevance
+        ranked.append(chunk)
+    return sorted(ranked, key=lambda x: x["relevance"], reverse=True)
 @rag_app.post("/search")
 async def search(req: SearchRequest):
     vector = embed_text(req.query)
     results = db_search(req.tenant_id, vector)
+    ranked = rank_chunks(results, vector)
+    filtered = [chunk for chunk in ranked if chunk["relevance"] >= 0.55][:3]
+    return {
+        "results": filtered,
+        "metadata": {
+            "total_retrieved": len(results),
+            "returned": len(filtered),
+            "threshold": 0.55
+        }
+    }
 if __name__ == "__main__":

backend/tests/test_agent_orchestrator.py CHANGED Viewed

@@ -63,6 +63,11 @@ class FakeMCP:
         return {"action": "allow"}
 # ---------------------------
 # Patch orchestrator to use fake MCP + fake redflag
 # ---------------------------
@@ -135,6 +140,7 @@ async def test_block_on_redflag(orchestrator):
     assert resp.decision.action == "block"
     assert resp.decision.tool == "admin"
     assert "salary" in resp.tool_traces[0]["redflags"][0]["matched_text"]
 @pytest.mark.asyncio
@@ -158,9 +164,10 @@ async def test_rag_tool_path(orchestrator, monkeypatch):
     resp = await orchestrator.handle(req)
-    assert resp.decision.tool == "rag"
-    assert "RAG_DOC_CONTENT" in resp.tool_traces[0]["response"]["results"][0]["text"]
     assert resp.text == "MOCK_ANSWER"
 @pytest.mark.asyncio
@@ -184,8 +191,10 @@ async def test_web_tool_path(orchestrator, monkeypatch):
     resp = await orchestrator.handle(req)
-    assert resp.decision.tool == "web"
     assert resp.text == "MOCK_ANSWER"
 @pytest.mark.asyncio
@@ -218,3 +227,4 @@ async def test_default_llm_path(orchestrator, monkeypatch):
     assert resp.decision.action == "respond"
     assert resp.decision.tool is None
     assert resp.text == "MOCK_ANSWER"

         return {"action": "allow"}
+def assert_trace_has_step(resp, step_name):
+    assert resp.reasoning_trace, "reasoning trace missing"
+    assert any(entry.get("step") == step_name for entry in resp.reasoning_trace), f"{step_name} missing"
 # ---------------------------
 # Patch orchestrator to use fake MCP + fake redflag
 # ---------------------------
     assert resp.decision.action == "block"
     assert resp.decision.tool == "admin"
     assert "salary" in resp.tool_traces[0]["redflags"][0]["matched_text"]
+    assert_trace_has_step(resp, "redflag_check")
 @pytest.mark.asyncio
     resp = await orchestrator.handle(req)
+    assert resp.decision.action == "multi_step"
+    assert any(trace["tool"] == "rag" for trace in resp.tool_traces if trace.get("tool") == "rag")
     assert resp.text == "MOCK_ANSWER"
+    assert_trace_has_step(resp, "tool_selection")
 @pytest.mark.asyncio
     resp = await orchestrator.handle(req)
+    assert resp.decision.action == "multi_step"
+    assert any(trace["tool"] == "web" for trace in resp.tool_traces if trace.get("tool") == "web")
     assert resp.text == "MOCK_ANSWER"
+    assert_trace_has_step(resp, "tool_selection")
 @pytest.mark.asyncio
     assert resp.decision.action == "respond"
     assert resp.decision.tool is None
     assert resp.text == "MOCK_ANSWER"
+    assert_trace_has_step(resp, "intent_detection")

frontend/.gitignore ADDED Viewed

	@@ -0,0 +1,41 @@

+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+# dependencies
+/node_modules
+/.pnp
+.pnp.*
+.yarn/*
+!.yarn/patches
+!.yarn/plugins
+!.yarn/releases
+!.yarn/versions
+# testing
+/coverage
+# next.js
+/.next/
+/out/
+# production
+/build
+# misc
+.DS_Store
+*.pem
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+.pnpm-debug.log*
+# env files (can opt-in for committing if needed)
+.env*
+# vercel
+.vercel
+# typescript
+*.tsbuildinfo
+next-env.d.ts

frontend/README.md ADDED Viewed

	@@ -0,0 +1,36 @@

+This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
+## Getting Started
+First, run the development server:
+```bash
+npm run dev
+# or
+yarn dev
+# or
+pnpm dev
+# or
+bun dev
+```
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
+You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
+This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
+## Learn More
+To learn more about Next.js, take a look at the following resources:
+- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
+- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
+You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
+## Deploy on Vercel
+The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
+Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.

frontend/app/favicon.ico ADDED Viewed

frontend/app/globals.css ADDED Viewed

	@@ -0,0 +1,26 @@

+@import "tailwindcss";
+:root {
+  --background: #ffffff;
+  --foreground: #171717;
+}
+@theme inline {
+  --color-background: var(--background);
+  --color-foreground: var(--foreground);
+  --font-sans: var(--font-geist-sans);
+  --font-mono: var(--font-geist-mono);
+}
+@media (prefers-color-scheme: dark) {
+  :root {
+    --background: #0a0a0a;
+    --foreground: #ededed;
+  }
+}
+body {
+  background: var(--background);
+  color: var(--foreground);
+  font-family: Arial, Helvetica, sans-serif;
+}

frontend/app/layout.tsx ADDED Viewed

	@@ -0,0 +1,34 @@

+import type { Metadata } from "next";
+import { Geist, Geist_Mono } from "next/font/google";
+import "./globals.css";
+const geistSans = Geist({
+  variable: "--font-geist-sans",
+  subsets: ["latin"],
+});
+const geistMono = Geist_Mono({
+  variable: "--font-geist-mono",
+  subsets: ["latin"],
+});
+export const metadata: Metadata = {
+  title: "Create Next App",
+  description: "Generated by create next app",
+};
+export default function RootLayout({
+  children,
+}: Readonly<{
+  children: React.ReactNode;
+}>) {
+  return (
+    <html lang="en">
+      <body
+        className={`${geistSans.variable} ${geistMono.variable} antialiased`}
+      >
+        {children}
+      </body>
+    </html>
+  );
+}

frontend/app/page.tsx ADDED Viewed

	@@ -0,0 +1,65 @@

+import Image from "next/image";
+export default function Home() {
+  return (
+    <div className="flex min-h-screen items-center justify-center bg-zinc-50 font-sans dark:bg-black">
+      <main className="flex min-h-screen w-full max-w-3xl flex-col items-center justify-between py-32 px-16 bg-white dark:bg-black sm:items-start">
+        <Image
+          className="dark:invert"
+          src="/next.svg"
+          alt="Next.js logo"
+          width={100}
+          height={20}
+          priority
+        />
+        <div className="flex flex-col items-center gap-6 text-center sm:items-start sm:text-left">
+          <h1 className="max-w-xs text-3xl font-semibold leading-10 tracking-tight text-black dark:text-zinc-50">
+            To get started, edit the page.tsx file.
+          </h1>
+          <p className="max-w-md text-lg leading-8 text-zinc-600 dark:text-zinc-400">
+            Looking for a starting point or more instructions? Head over to{" "}
+            <a
+              href="https://vercel.com/templates?framework=next.js&utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
+              className="font-medium text-zinc-950 dark:text-zinc-50"
+            >
+              Templates
+            </a>{" "}
+            or the{" "}
+            <a
+              href="https://nextjs.org/learn?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
+              className="font-medium text-zinc-950 dark:text-zinc-50"
+            >
+              Learning
+            </a>{" "}
+            center.
+          </p>
+        </div>
+        <div className="flex flex-col gap-4 text-base font-medium sm:flex-row">
+          <a
+            className="flex h-12 w-full items-center justify-center gap-2 rounded-full bg-foreground px-5 text-background transition-colors hover:bg-[#383838] dark:hover:bg-[#ccc] md:w-[158px]"
+            href="https://vercel.com/new?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
+            target="_blank"
+            rel="noopener noreferrer"
+          >
+            <Image
+              className="dark:invert"
+              src="/vercel.svg"
+              alt="Vercel logomark"
+              width={16}
+              height={16}
+            />
+            Deploy Now
+          </a>
+          <a
+            className="flex h-12 w-full items-center justify-center rounded-full border border-solid border-black/[.08] px-5 transition-colors hover:border-transparent hover:bg-black/[.04] dark:border-white/[.145] dark:hover:bg-[#1a1a1a] md:w-[158px]"
+            href="https://nextjs.org/docs?utm_source=create-next-app&utm_medium=appdir-template-tw&utm_campaign=create-next-app"
+            target="_blank"
+            rel="noopener noreferrer"
+          >
+            Documentation
+          </a>
+        </div>
+      </main>
+    </div>
+  );
+}

frontend/eslint.config.mjs ADDED Viewed

	@@ -0,0 +1,18 @@

+import { defineConfig, globalIgnores } from "eslint/config";
+import nextVitals from "eslint-config-next/core-web-vitals";
+import nextTs from "eslint-config-next/typescript";
+const eslintConfig = defineConfig([
+  ...nextVitals,
+  ...nextTs,
+  // Override default ignores of eslint-config-next.
+  globalIgnores([
+    // Default ignores of eslint-config-next:
+    ".next/**",
+    "out/**",
+    "build/**",
+    "next-env.d.ts",
+  ]),
+]);
+export default eslintConfig;

frontend/next.config.ts ADDED Viewed

	@@ -0,0 +1,7 @@

+import type { NextConfig } from "next";
+const nextConfig: NextConfig = {
+  /* config options here */
+};
+export default nextConfig;

frontend/package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

frontend/package.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "name": "frontend",
+  "version": "0.1.0",
+  "private": true,
+  "scripts": {
+    "dev": "next dev",
+    "build": "next build",
+    "start": "next start",
+    "lint": "eslint"
+  },
+  "dependencies": {
+    "next": "16.0.3",
+    "react": "19.2.0",
+    "react-dom": "19.2.0"
+  },
+  "devDependencies": {
+    "@tailwindcss/postcss": "^4",
+    "@types/node": "^20",
+    "@types/react": "^19",
+    "@types/react-dom": "^19",
+    "eslint": "^9",
+    "eslint-config-next": "16.0.3",
+    "tailwindcss": "^4",
+    "typescript": "^5"
+  }
+}

frontend/postcss.config.mjs ADDED Viewed

	@@ -0,0 +1,7 @@

+const config = {
+  plugins: {
+    "@tailwindcss/postcss": {},
+  },
+};
+export default config;

frontend/public/file.svg ADDED Viewed

frontend/public/globe.svg ADDED Viewed

frontend/public/next.svg ADDED Viewed

frontend/public/vercel.svg ADDED Viewed

frontend/public/window.svg ADDED Viewed

frontend/tsconfig.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "compilerOptions": {
+    "target": "ES2017",
+    "lib": ["dom", "dom.iterable", "esnext"],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "module": "esnext",
+    "moduleResolution": "bundler",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "react-jsx",
+    "incremental": true,
+    "plugins": [
+      {
+        "name": "next"
+      }
+    ],
+    "paths": {
+      "@/*": ["./*"]
+    }
+  },
+  "include": [
+    "next-env.d.ts",
+    "**/*.ts",
+    "**/*.tsx",
+    ".next/types/**/*.ts",
+    ".next/dev/types/**/*.ts",
+    "**/*.mts"
+  ],
+  "exclude": ["node_modules"]
+}

test_all.py ADDED Viewed

	@@ -0,0 +1,233 @@

+"""
+Single-file test suite for IntegraChat backend (unit + integration + simulation).
+This version aligns with the current backend API surface.
+"""
+from __future__ import annotations
+import os
+import sys
+from pathlib import Path
+from typing import List, Dict
+import pytest
+from fastapi.testclient import TestClient
+# ---------------------------------------------------------------------------
+# Ensure backend package is importable
+# ---------------------------------------------------------------------------
+PROJECT_ROOT = Path(__file__).resolve().parent
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+backend_path = PROJECT_ROOT / "backend"
+if str(backend_path) not in sys.path:
+    sys.path.insert(0, str(backend_path))
+# ---------------------------------------------------------------------------
+# Shared fixtures
+# ---------------------------------------------------------------------------
+@pytest.fixture(autouse=True, scope="session")
+def set_test_env():
+    os.environ.setdefault("RAG_MCP_URL", "http://mock-rag")
+    os.environ.setdefault("WEB_MCP_URL", "http://mock-web")
+    os.environ.setdefault("ADMIN_MCP_URL", "http://mock-admin")
+    os.environ.setdefault("OLLAMA_URL", "http://localhost:11434")
+    os.environ.setdefault("OLLAMA_MODEL", "llama3")
+    os.environ.setdefault("LLM_BACKEND", "ollama")
+@pytest.fixture
+def mock_backend_dependencies(monkeypatch):
+    print(">> applying backend dependency patches for tests")
+    """Patch MCP client calls and red-flag detector for deterministic tests."""
+    from backend.api.models.redflag import RedFlagMatch
+    from backend.api.services.tool_scoring import ToolScoringService
+    import types
+    async def fake_call_rag(self, tenant_id: str, query: str) -> Dict:
+        return {
+            "results": [
+                {"text": "HR policy includes onboarding, leave rules.", "relevance": 0.92},
+                {"text": "General company announcement", "relevance": 0.42}
+            ],
+            "metadata": {"total_retrieved": 2, "returned": 2, "threshold": 0.55}
+        }
+    async def fake_call_web(self, tenant_id: str, query: str) -> Dict:
+        return {
+            "results": [
+                {"title": "Latest inflation update", "snippet": "Inflation is 3.2%", "url": "https://example.com"},
+                {"title": "Global news", "snippet": "Market highlights", "url": "https://news.example.com"}
+            ]
+        }
+    async def fake_call_admin(self, tenant_id: str, query: str) -> Dict:
+        return {"status": "ok", "tenant_id": tenant_id, "query": query}
+    monkeypatch.setattr("backend.api.mcp_clients.mcp_client.MCPClient.call_rag", fake_call_rag)
+    monkeypatch.setattr("backend.api.mcp_clients.mcp_client.MCPClient.call_web", fake_call_web)
+    monkeypatch.setattr("backend.api.mcp_clients.mcp_client.MCPClient.call_admin", fake_call_admin)
+    async def fake_redflag_check(self, tenant_id: str, text: str) -> List[RedFlagMatch]:
+        if "delete" in text.lower():
+            return [
+                RedFlagMatch(
+                    rule_id="1",
+                    pattern="delete",
+                    severity="high",
+                    description="Deletion request",
+                    matched_text="delete",
+                    confidence=0.9,
+                    explanation="Matched on keyword 'delete'"
+                )
+            ]
+        return []
+    async def fake_notify(self, tenant_id, violations, source_payload=None):
+        return None
+    monkeypatch.setattr("backend.api.services.redflag_detector.RedFlagDetector.check", fake_redflag_check)
+    monkeypatch.setattr("backend.api.services.redflag_detector.RedFlagDetector.notify_admin", fake_notify)
+    def fake_score(self, message: str, intent: str, rag_results: List[Dict]) -> Dict[str, float]:
+        return {"rag_fitness": 0.82, "web_fitness": 0.78, "llm_only": 0.25}
+    monkeypatch.setattr(ToolScoringService, "score", fake_score)
+    # Ensure already-instantiated orchestrator uses the same patches
+    from backend.api.routes import agent as agent_routes
+    agent_routes.orchestrator.mcp.call_rag = types.MethodType(fake_call_rag, agent_routes.orchestrator.mcp)
+    agent_routes.orchestrator.mcp.call_web = types.MethodType(fake_call_web, agent_routes.orchestrator.mcp)
+    agent_routes.orchestrator.mcp.call_admin = types.MethodType(fake_call_admin, agent_routes.orchestrator.mcp)
+    agent_routes.orchestrator.redflag.check = types.MethodType(fake_redflag_check, agent_routes.orchestrator.redflag)
+    agent_routes.orchestrator.redflag.notify_admin = types.MethodType(fake_notify, agent_routes.orchestrator.redflag)
+@pytest.fixture
+def api_client(mock_backend_dependencies):
+    from backend.api.main import app
+    return TestClient(app)
+# ---------------------------------------------------------------------------
+# Unit tests
+# ---------------------------------------------------------------------------
+@pytest.mark.asyncio
+async def test_redflag_detector():
+    import time
+    from backend.api.services.redflag_detector import RedFlagDetector
+    from backend.api.models.redflag import RedFlagRule
+    from backend.api.services.semantic_encoder import embed_text
+    detector = RedFlagDetector(supabase_url="http://fake", supabase_key="fake")
+    rule = RedFlagRule(
+        id="rule-salary",
+        pattern="salary",
+        description="Salary access",
+        severity="high",
+        source="test",
+        enabled=True,
+        keywords=["salary"]
+    )
+    detector._rules_cache["tenant-x"] = {"fetched_at": int(time.time()), "rules": [rule]}
+    detector._rule_embeddings["tenant-x"] = {rule.id: embed_text("salary access")}
+    matches = await detector.check("tenant-x", "Show me employee salary details")
+    assert matches
+    assert matches[0].matched_text.lower() == "salary"
+    assert matches[0].confidence is not None
+def test_tool_scoring():
+    from backend.api.services.tool_scoring import ToolScoringService
+    scorer = ToolScoringService()
+    scores = scorer.score("What is inflation today?", intent="web", rag_results=[])
+    assert set(scores.keys()) == {"rag_fitness", "web_fitness", "llm_only"}
+    assert scores["web_fitness"] > scores["rag_fitness"]
+@pytest.mark.asyncio
+async def test_tool_selector():
+    from backend.api.services.tool_selector import ToolSelector
+    selector = ToolSelector()
+    decision = await selector.select(
+        intent="rag",
+        text="Tell me HR policy and compare with external news",
+        ctx={"rag_results": [{"text": "Policy"}], "tool_scores": {"rag_fitness": 0.9, "web_fitness": 0.8}}
+    )
+    steps = decision.tool_input["steps"]
+    assert steps[0]["tool"] == "rag"
+    assert any(step["tool"] == "web" for step in steps)
+    assert steps[-1]["tool"] == "llm"
+def test_reasoning_trace_via_response(api_client):
+    payload = {"tenant_id": "tenant1", "message": "Summarize our HR policies"}
+    res = api_client.post("/agent/message", json=payload)
+    data = res.json()
+    assert data["reasoning_trace"]
+    step_names = [entry["step"] for entry in data["reasoning_trace"]]
+    assert "intent_detection" in step_names
+# ---------------------------------------------------------------------------
+# Integration tests
+# ---------------------------------------------------------------------------
+def test_full_agent_pipeline(api_client):
+    payload = {"tenant_id": "tenant123", "message": "What are our HR policies and latest updates?"}
+    response = api_client.post("/agent/message", json=payload)
+    data = response.json()
+    assert data["text"]
+    assert len(data["reasoning_trace"]) >= 3
+    rag_steps = [step for step in data["reasoning_trace"] if step.get("tool") == "rag"]
+    assert rag_steps, "expected rag tool execution in reasoning trace"
+def test_parallel_execution_detected(api_client):
+    payload = {"tenant_id": "t1", "message": "Summarize HR policies and latest news updates"}
+    response = api_client.post("/agent/message", json=payload)
+    data = response.json()
+    tools_used = {trace.get("tool") for trace in data["tool_traces"] if trace.get("tool")}
+    assert "rag" in tools_used and "web" in tools_used
+# ---------------------------------------------------------------------------
+# Simulation tests
+# ---------------------------------------------------------------------------
+SIM_QUERIES = [
+    "What is the inflation rate today?",
+    "Summarize our HR policies",
+    "Delete all records",
+    "Explain our refund policy",
+    "How many employees are in the company?"
+]
+@pytest.mark.parametrize("message", SIM_QUERIES)
+def test_agent_simulation(api_client, message):
+    res = api_client.post("/agent/message", json={"tenant_id": "demo", "message": message})
+    data = res.json()
+    assert data["text"]
+    assert data["reasoning_trace"]
+    if "delete" in message.lower():
+        assert data["decision"]["action"] in {"block", "multi_step"}
+        reason = (data["decision"]["reason"] or "").lower()
+        assert "admin" in reason or "redflag" in reason