Spaces:

nothingworry
/

IntegraChat

Sleeping

App Files Files Community

nothingworry commited on 15 days ago

Commit

0e8c152

1 Parent(s): fe818bb

feat: update the encoding model

Browse files

Files changed (5) hide show

backend/mcp_server/common/reranker.py +111 -0
backend/mcp_server/rag/search.py +54 -14
frontend/app/admin-rules/page.tsx +38 -38
frontend/components/knowledge-base-panel.tsx +13 -5
test_reranking.py +197 -0

backend/mcp_server/common/reranker.py ADDED Viewed

	@@ -0,0 +1,111 @@

+"""
+Cross-encoder re-ranking for RAG search results.
+Uses cross-encoder/ms-marco-MiniLM-L-6-v2 for fast, accurate re-ranking
+of vector search results to improve retrieval accuracy.
+"""
+from __future__ import annotations
+from functools import lru_cache
+from typing import List, Dict, Any, Optional
+try:
+    from sentence_transformers import CrossEncoder
+except ImportError:
+    CrossEncoder = None  # type: ignore
+@lru_cache(maxsize=1)
+def _get_reranker() -> Optional[Any]:
+    """
+    Lazily load the cross-encoder model once per process.
+    Uses cross-encoder/ms-marco-MiniLM-L-6-v2 which is optimized for
+    MS MARCO dataset and provides fast, accurate re-ranking.
+    """
+    if CrossEncoder is None:
+        return None
+    try:
+        # Load the cross-encoder model
+        # This model is specifically trained for re-ranking search results
+        model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
+        return model
+    except Exception as e:
+        print(f"Warning: Failed to load cross-encoder model: {e}")
+        print("RAG search will continue without re-ranking.")
+        return None
+def rerank_results(
+    query: str,
+    candidates: List[Dict[str, Any]],
+    top_k: Optional[int] = None,
+) -> List[Dict[str, Any]]:
+    """
+    Re-rank search results using cross-encoder for improved accuracy.
+    Args:
+        query: The search query
+        candidates: List of candidate results, each with at least a "text" field
+        top_k: Optional limit on number of results to return after re-ranking
+    Returns:
+        Re-ranked list of candidates with updated "score" and "relevance" fields
+    """
+    if not candidates:
+        return []
+    reranker = _get_reranker()
+    # If cross-encoder is not available, return original results
+    if reranker is None:
+        return candidates
+    try:
+        # Prepare pairs: (query, candidate_text) for each candidate
+        pairs = [(query, candidate.get("text", "")) for candidate in candidates]
+        # Get re-ranking scores (higher = more relevant)
+        # Cross-encoder outputs raw scores (can be negative or positive)
+        scores = reranker.predict(pairs)
+        # Update candidates with new scores
+        reranked = []
+        for candidate, score in zip(candidates, scores):
+            # Cross-encoder scores are logits, normalize to 0-1 using sigmoid
+            # This ensures scores are in [0, 1] range for consistency with vector similarity scores
+            try:
+                import numpy as np
+                # Apply sigmoid to normalize logit scores to [0, 1]
+                normalized_score = float(1.0 / (1.0 + np.exp(-float(score))))
+            except (ImportError, ValueError, TypeError):
+                # Fallback: if numpy not available, use simple normalization
+                # Cross-encoder scores for ms-marco-MiniLM-L-6-v2 are typically in [-10, 10] range
+                # Simple linear scaling to [0, 1] as fallback
+                score_float = float(score) if isinstance(score, (int, float)) else 0.0
+                normalized_score = max(0.0, min(1.0, (score_float + 10.0) / 20.0))
+            # Update the candidate with re-ranked score
+            updated = {
+                **candidate,
+                "score": normalized_score,
+                "relevance": normalized_score,  # Keep both for compatibility
+                "reranked": True,  # Flag to indicate this was re-ranked
+            }
+            reranked.append(updated)
+        # Sort by re-ranked score (descending)
+        reranked.sort(key=lambda x: x.get("score", 0.0), reverse=True)
+        # Return top_k if specified
+        if top_k is not None and top_k > 0:
+            reranked = reranked[:top_k]
+        return reranked
+    except Exception as e:
+        print(f"Warning: Cross-encoder re-ranking failed: {e}")
+        print("Returning original results without re-ranking.")
+        return candidates

backend/mcp_server/rag/search.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import Any, Mapping
 from backend.mcp_server.common.database import search_vectors
 from backend.mcp_server.common.embeddings import embed_text
 from backend.mcp_server.common.logging import log_rag_search_metrics
 from backend.mcp_server.common.tenant import TenantContext
 from backend.mcp_server.common.utils import ToolValidationError, tool_handler
@@ -33,32 +34,70 @@ async def rag_search(context: TenantContext, payload: Mapping[str, Any]) -> dict
         raise ToolValidationError("threshold must be a float between 0.0 and 1.0")
     embedding = embed_text(query)
-    raw_results = search_vectors(context.tenant_id, embedding, limit=limit_value)
-    # Return top results even if slightly below threshold, but prioritize high-scoring ones
     filtered = []
-    for chunk in raw_results:
-        similarity = chunk.get("similarity", 0.0)
         if similarity >= threshold_value:
             filtered.append({
                 "text": chunk.get("text", ""),
                 "relevance": similarity,
                 "score": similarity  # Add score field for compatibility
             })
-    # If we have results above threshold, return top 3. Otherwise, return top 1 even if below threshold.
     if filtered:
-        filtered = sorted(filtered, key=lambda x: x.get("relevance", 0.0), reverse=True)[:3]
-    elif raw_results:
         # Return the top result even if below threshold, as it might still be relevant
-        top_chunk = raw_results[0]
         filtered = [{
             "text": top_chunk.get("text", ""),
-            "relevance": top_chunk.get("similarity", 0.0),
-            "score": top_chunk.get("similarity", 0.0)
         }]
-    hits = len(raw_results)
-    avg_score = mean([item.get("similarity", 0.0) for item in raw_results]) if raw_results else None
-    top_score = raw_results[0].get("similarity") if raw_results else None
     log_rag_search_metrics(
         tenant_id=context.tenant_id,
@@ -74,7 +113,8 @@ async def rag_search(context: TenantContext, payload: Mapping[str, Any]) -> dict
         "metadata": {
             "limit": limit_value,
             "threshold": threshold_value,
-            "hits_before_filter": hits,
         },
     }

 from backend.mcp_server.common.database import search_vectors
 from backend.mcp_server.common.embeddings import embed_text
 from backend.mcp_server.common.logging import log_rag_search_metrics
+from backend.mcp_server.common.reranker import rerank_results
 from backend.mcp_server.common.tenant import TenantContext
 from backend.mcp_server.common.utils import ToolValidationError, tool_handler
         raise ToolValidationError("threshold must be a float between 0.0 and 1.0")
     embedding = embed_text(query)
+    # Step 1: Get top 10 candidates from vector search for re-ranking
+    # We fetch more candidates than requested to allow cross-encoder to find the best matches
+    rerank_candidates_count = max(10, limit_value * 2)  # Get at least 10, or 2x the requested limit
+    raw_results = search_vectors(context.tenant_id, embedding, limit=rerank_candidates_count)
+    # Step 2: Re-rank candidates using cross-encoder for improved accuracy
+    # Re-rank up to top 10 candidates (or all if fewer than 10)
+    candidates_for_rerank = raw_results[:10]  # Re-rank top 10 (or all available)
+    reranked_results = None
+    if candidates_for_rerank:
+        # Prepare candidates with text and initial similarity score
+        candidates = [
+            {
+                "text": chunk.get("text", ""),
+                "relevance": chunk.get("similarity", 0.0),
+                "score": chunk.get("similarity", 0.0),
+            }
+            for chunk in candidates_for_rerank
+        ]
+        # Re-rank using cross-encoder (returns top_k results already sorted)
+        reranked = rerank_results(query, candidates, top_k=limit_value)
+        if reranked:
+            reranked_results = reranked
+    # Step 3: Use re-ranked results if available, otherwise use original vector search results
+    results_to_filter = reranked_results if reranked_results else raw_results
+    # Step 4: Filter by threshold and return top results
     filtered = []
+    for chunk in results_to_filter:
+        # Re-ranked results have "score" and "relevance", original have "similarity"
+        similarity = chunk.get("similarity") or chunk.get("score") or chunk.get("relevance") or 0.0
         if similarity >= threshold_value:
             filtered.append({
                 "text": chunk.get("text", ""),
                 "relevance": similarity,
                 "score": similarity  # Add score field for compatibility
             })
+    # If we have results above threshold, return top results. Otherwise, return top 1 even if below threshold.
     if filtered:
+        filtered = sorted(filtered, key=lambda x: x.get("relevance", 0.0), reverse=True)[:limit_value]
+    elif results_to_filter:
         # Return the top result even if below threshold, as it might still be relevant
+        top_chunk = results_to_filter[0]
+        similarity = top_chunk.get("similarity") or top_chunk.get("score") or top_chunk.get("relevance") or 0.0
         filtered = [{
             "text": top_chunk.get("text", ""),
+            "relevance": similarity,
+            "score": similarity
         }]
+    # Calculate metrics from the results we're using (re-ranked or original)
+    hits = len(results_to_filter)
+    scores_for_metrics = [
+        item.get("similarity") or item.get("score") or item.get("relevance") or 0.0
+        for item in results_to_filter
+    ]
+    avg_score = mean(scores_for_metrics) if scores_for_metrics else None
+    top_score = scores_for_metrics[0] if scores_for_metrics else None
     log_rag_search_metrics(
         tenant_id=context.tenant_id,
         "metadata": {
             "limit": limit_value,
             "threshold": threshold_value,
+            "hits_before_filter": len(raw_results),
+            "reranked": reranked_results is not None,
         },
     }

frontend/app/admin-rules/page.tsx CHANGED Viewed

@@ -52,44 +52,6 @@ export default function AdminRulesPage() {
   const [lastUpdated, setLastUpdated] = useState<string>("");
   const fileInputRef = useRef<HTMLInputElement>(null);
-  // Check permissions early
-  if (!canManageRules(role)) {
-    return (
-      <main className="mx-auto flex min-h-screen max-w-5xl flex-col gap-10 px-4 pb-16 pt-12 sm:px-6 lg:px-8">
-        <header className="flex flex-col gap-4 rounded-2xl border border-white/10 bg-white/5 px-6 py-6 text-slate-100 shadow-lg shadow-slate-950/40">
-          <div className="flex items-center justify-between gap-3">
-            <div className="flex items-center gap-3 text-base font-semibold">
-              <span className="inline-flex h-10 w-10 items-center justify-center rounded-2xl bg-gradient-to-br from-sky-400 to-cyan-500 text-slate-950">
-                IC
-              </span>
-              IntegraChat · Admin Rules
-            </div>
-            <div className="flex items-center gap-4">
-              <TenantSelector />
-              <Link href="/" className="text-xs font-semibold uppercase tracking-[0.3em] text-cyan-300 hover:text-white">
-                ← Back Home
-              </Link>
-            </div>
-          </div>
-        </header>
-        <div className="rounded-2xl border border-red-500/50 bg-red-500/10 p-8 text-center">
-          <h2 className="text-2xl font-bold text-red-300 mb-2">Access Denied</h2>
-          <p className="text-slate-300 mb-4">
-            You need <strong>Admin</strong> or <strong>Owner</strong> role to manage rules.
-          </p>
-          <p className="text-sm text-slate-400">
-            Your current role: <strong className="text-slate-200">{role.charAt(0).toUpperCase() + role.slice(1)}</strong>
-          </p>
-          <p className="text-sm text-slate-400 mt-2">
-            Please switch your role using the dropdown in the header.
-          </p>
-        </div>
-        <Footer />
-      </main>
-    );
-  }
   // Set initial time only on client side to avoid hydration mismatch
   useEffect(() => {
     setLastUpdated(new Date().toLocaleTimeString());
@@ -316,6 +278,44 @@ export default function AdminRulesPage() {
     }
   }, [deleteInput, handleRefresh, headers, requireTenant]);
   return (
     <main className="mx-auto flex min-h-screen max-w-5xl flex-col gap-10 px-4 pb-16 pt-12 sm:px-6 lg:px-8">
       <header className="flex flex-col gap-4 rounded-2xl border border-white/10 bg-white/5 px-6 py-6 text-slate-100 shadow-lg shadow-slate-950/40">

   const [lastUpdated, setLastUpdated] = useState<string>("");
   const fileInputRef = useRef<HTMLInputElement>(null);
   // Set initial time only on client side to avoid hydration mismatch
   useEffect(() => {
     setLastUpdated(new Date().toLocaleTimeString());
     }
   }, [deleteInput, handleRefresh, headers, requireTenant]);
+  // Check permissions AFTER all hooks are called
+  if (!canManageRules(role)) {
+    return (
+      <main className="mx-auto flex min-h-screen max-w-5xl flex-col gap-10 px-4 pb-16 pt-12 sm:px-6 lg:px-8">
+        <header className="flex flex-col gap-4 rounded-2xl border border-white/10 bg-white/5 px-6 py-6 text-slate-100 shadow-lg shadow-slate-950/40">
+          <div className="flex items-center justify-between gap-3">
+            <div className="flex items-center gap-3 text-base font-semibold">
+              <span className="inline-flex h-10 w-10 items-center justify-center rounded-2xl bg-gradient-to-br from-sky-400 to-cyan-500 text-slate-950">
+                IC
+              </span>
+              IntegraChat · Admin Rules
+            </div>
+            <div className="flex items-center gap-4">
+              <TenantSelector />
+              <Link href="/" className="text-xs font-semibold uppercase tracking-[0.3em] text-cyan-300 hover:text-white">
+                ← Back Home
+              </Link>
+            </div>
+          </div>
+        </header>
+        <div className="rounded-2xl border border-red-500/50 bg-red-500/10 p-8 text-center">
+          <h2 className="text-2xl font-bold text-red-300 mb-2">Access Denied</h2>
+          <p className="text-slate-300 mb-4">
+            You need <strong>Admin</strong> or <strong>Owner</strong> role to manage rules.
+          </p>
+          <p className="text-sm text-slate-400">
+            Your current role: <strong className="text-slate-200">{role.charAt(0).toUpperCase() + role.slice(1)}</strong>
+          </p>
+          <p className="text-sm text-slate-400 mt-2">
+            Please switch your role using the dropdown in the header.
+          </p>
+        </div>
+        <Footer />
+      </main>
+    );
+  }
   return (
     <main className="mx-auto flex min-h-screen max-w-5xl flex-col gap-10 px-4 pb-16 pt-12 sm:px-6 lg:px-8">
       <header className="flex flex-col gap-4 rounded-2xl border border-white/10 bg-white/5 px-6 py-6 text-slate-100 shadow-lg shadow-slate-950/40">

frontend/components/knowledge-base-panel.tsx CHANGED Viewed

@@ -20,7 +20,7 @@ type Document = {
 type SourceType = "raw_text" | "url" | "pdf" | "docx" | "txt" | "markdown";
 const API_BASE =
-  process.env.NEXT_PUBLIC_API_URL?.replace(/\/$/, "") || "http://localhost:8000";
 export function KnowledgeBasePanel() {
   const { tenantId, isLoading: tenantLoading, role } = useTenant();
@@ -242,7 +242,7 @@ export function KnowledgeBasePanel() {
           setDocuments([]);
           return;
         } else if (response.status === 503) {
-          console.error("Cannot connect to RAG MCP server");
           setDocuments([]);
           return;
         } else {
@@ -253,8 +253,15 @@ export function KnowledgeBasePanel() {
       const data = await response.json();
       setDocuments(data.documents || []);
     } catch (err) {
-      console.error(err);
-      setDocuments([]);
       // Don't show error in status for document loading - it's not critical
     } finally {
       setIsLoadingDocs(false);
@@ -338,7 +345,8 @@ export function KnowledgeBasePanel() {
     if (!tenantLoading && tenantId && tenantId.trim()) {
       loadDocuments();
     }
-  }, [tenantId, tenantLoading]);
   return (
     <section

 type SourceType = "raw_text" | "url" | "pdf" | "docx" | "txt" | "markdown";
 const API_BASE =
+  process.env.NEXT_PUBLIC_BACKEND_BASE_URL?.replace(/\/$/, "") || "http://localhost:8000";
 export function KnowledgeBasePanel() {
   const { tenantId, isLoading: tenantLoading, role } = useTenant();
           setDocuments([]);
           return;
         } else if (response.status === 503) {
+          console.warn("Cannot connect to RAG MCP server");
           setDocuments([]);
           return;
         } else {
       const data = await response.json();
       setDocuments(data.documents || []);
     } catch (err) {
+      // Handle network errors (e.g., backend not running, CORS, etc.)
+      if (err instanceof TypeError && err.message === "Failed to fetch") {
+        // Network error - backend likely not running or unreachable
+        console.warn("Cannot connect to backend. Make sure the backend server is running.");
+        setDocuments([]);
+      } else {
+        console.error("Error loading documents:", err);
+        setDocuments([]);
+      }
       // Don't show error in status for document loading - it's not critical
     } finally {
       setIsLoadingDocs(false);
     if (!tenantLoading && tenantId && tenantId.trim()) {
       loadDocuments();
     }
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [tenantId, tenantLoading, role]);
   return (
     <section

test_reranking.py ADDED Viewed

	@@ -0,0 +1,197 @@

+"""
+Test script for cross-encoder re-ranking in RAG search.
+This script tests:
+1. Model loading
+2. Re-ranking functionality
+3. Comparison of results with/without re-ranking
+"""
+import sys
+import asyncio
+from pathlib import Path
+# Add backend to path
+backend_dir = Path(__file__).parent / "backend"
+sys.path.insert(0, str(backend_dir))
+from mcp_server.common.reranker import rerank_results, _get_reranker
+def test_model_loading():
+    """Test that the cross-encoder model loads correctly."""
+    print("=" * 60)
+    print("Test 1: Model Loading")
+    print("=" * 60)
+    try:
+        reranker = _get_reranker()
+        if reranker is None:
+            print("❌ FAILED: Reranker model is None (sentence-transformers not available?)")
+            return False
+        print("✅ SUCCESS: Cross-encoder model loaded successfully")
+        print(f"   Model type: {type(reranker).__name__}")
+        return True
+    except Exception as e:
+        print(f"❌ FAILED: Error loading model: {e}")
+        return False
+def test_reranking_basic():
+    """Test basic re-ranking functionality."""
+    print("\n" + "=" * 60)
+    print("Test 2: Basic Re-ranking")
+    print("=" * 60)
+    query = "What is the refund policy?"
+    candidates = [
+        {"text": "Our refund policy allows returns within 30 days.", "score": 0.85, "relevance": 0.85},
+        {"text": "The company was founded in 2020.", "score": 0.45, "relevance": 0.45},
+        {"text": "Refunds are processed within 5-7 business days after approval.", "score": 0.72, "relevance": 0.72},
+        {"text": "Contact support for assistance.", "score": 0.30, "relevance": 0.30},
+    ]
+    print(f"Query: {query}")
+    print(f"\nOriginal order (by vector similarity):")
+    for i, cand in enumerate(candidates, 1):
+        print(f"  {i}. Score: {cand['score']:.3f} - {cand['text'][:60]}...")
+    try:
+        reranked = rerank_results(query, candidates, top_k=3)
+        if not reranked:
+            print("❌ FAILED: Re-ranking returned empty results")
+            return False
+        print(f"\nRe-ranked order (by cross-encoder):")
+        for i, cand in enumerate(reranked, 1):
+            print(f"  {i}. Score: {cand['score']:.3f} - {cand['text'][:60]}...")
+        # Check that results are sorted by score (descending)
+        scores = [c.get("score", 0.0) for c in reranked]
+        if scores != sorted(scores, reverse=True):
+            print("❌ FAILED: Results are not sorted by score")
+            return False
+        # Check that reranked flag is set
+        if not all(c.get("reranked") is True for c in reranked):
+            print("❌ FAILED: 'reranked' flag not set")
+            return False
+        print("✅ SUCCESS: Re-ranking works correctly")
+        return True
+    except Exception as e:
+        print(f"❌ FAILED: Error during re-ranking: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+def test_reranking_empty():
+    """Test re-ranking with empty candidates."""
+    print("\n" + "=" * 60)
+    print("Test 3: Empty Candidates Handling")
+    print("=" * 60)
+    try:
+        reranked = rerank_results("test query", [])
+        if reranked == []:
+            print("✅ SUCCESS: Empty candidates handled correctly")
+            return True
+        else:
+            print(f"❌ FAILED: Expected empty list, got {reranked}")
+            return False
+    except Exception as e:
+        print(f"❌ FAILED: Error with empty candidates: {e}")
+        return False
+async def test_rag_search_integration():
+    """Test RAG search with re-ranking (requires database)."""
+    print("\n" + "=" * 60)
+    print("Test 4: RAG Search Integration (requires database)")
+    print("=" * 60)
+    try:
+        from mcp_server.rag.search import rag_search
+        from mcp_server.common.tenant import TenantContext
+        # Create a test tenant context
+        context = TenantContext(tenant_id="test_tenant_rerank")
+        # Test search
+        payload = {
+            "query": "test query",
+            "limit": 5,
+            "threshold": 0.1
+        }
+        print(f"Testing RAG search with query: '{payload['query']}'")
+        print("Note: This requires a running database with documents.")
+        result = await rag_search(context, payload)
+        print(f"\nResults: {len(result.get('results', []))} items")
+        print(f"Metadata: {result.get('metadata', {})}")
+        if result.get('metadata', {}).get('reranked'):
+            print("✅ SUCCESS: Re-ranking was applied")
+        else:
+            print("⚠️  WARNING: Re-ranking was not applied (may be normal if no candidates found)")
+        return True
+    except Exception as e:
+        print(f"⚠️  SKIPPED: Integration test requires database: {e}")
+        return None
+def main():
+    """Run all tests."""
+    print("\n" + "=" * 60)
+    print("Cross-Encoder Re-ranking Test Suite")
+    print("=" * 60)
+    results = []
+    # Test 1: Model loading
+    results.append(("Model Loading", test_model_loading()))
+    # Test 2: Basic re-ranking
+    results.append(("Basic Re-ranking", test_reranking_basic()))
+    # Test 3: Empty candidates
+    results.append(("Empty Candidates", test_reranking_empty()))
+    # Test 4: Integration (optional, requires DB)
+    try:
+        integration_result = asyncio.run(test_rag_search_integration())
+        if integration_result is not None:
+            results.append(("RAG Integration", integration_result))
+    except Exception as e:
+        print(f"⚠️  Integration test skipped: {e}")
+    # Summary
+    print("\n" + "=" * 60)
+    print("Test Summary")
+    print("=" * 60)
+    passed = sum(1 for _, result in results if result is True)
+    total = len(results)
+    for test_name, result in results:
+        status = "✅ PASS" if result is True else "❌ FAIL" if result is False else "⚠️  SKIP"
+        print(f"{status}: {test_name}")
+    print(f"\nTotal: {passed}/{total} tests passed")
+    if passed == total:
+        print("\n🎉 All tests passed!")
+    else:
+        print("\n⚠️  Some tests failed. Check output above for details.")
+if __name__ == "__main__":
+    main()