Spaces:

galcan
/

mcp-docs-server

Running

App Files Files Community

galcan commited on Oct 18, 2025

Commit

8b215ec

1 Parent(s): 587ea32

Remove FAISS binary file, use text-based search instead

Browse files

Files changed (4) hide show

.gitignore +4 -0
app.py +20 -16
requirements.txt +0 -2
test_app.py +2 -9

.gitignore CHANGED Viewed

@@ -41,3 +41,7 @@ Thumbs.db
 # Logs
 *.log

 # Logs
 *.log
+# FAISS index files
+*.index
+mcp_docs/index/faiss_md.index

app.py CHANGED Viewed

@@ -4,8 +4,6 @@ from typing import List, Dict, Any, Optional
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-import faiss
-import numpy as np
 app = FastAPI(title="MCP Documentation Server", version="1.0.0")
@@ -21,7 +19,6 @@ app.add_middleware(
 # Global variables for loaded data
 chunks_data = None
 docs_data = None
-faiss_index = None
 class SearchRequest(BaseModel):
     query: str
@@ -32,8 +29,8 @@ class SearchResponse(BaseModel):
     total: int
 def load_data():
-    """Load the embedded chunks and FAISS index"""
-    global chunks_data, docs_data, faiss_index
     try:
         # Load chunks data
@@ -44,10 +41,8 @@ def load_data():
         with open('mcp_docs/index/docs_md.json', 'r', encoding='utf-8') as f:
             docs_data = json.load(f)
-        # Load FAISS index
-        faiss_index = faiss.read_index('mcp_docs/index/faiss_md.index')
         print(f"Loaded {len(chunks_data)} chunks and {len(docs_data)} documents")
     except Exception as e:
         print(f"Error loading data: {e}")
@@ -70,18 +65,26 @@ async def root():
 @app.post("/search", response_model=SearchResponse)
 async def search_docs(request: SearchRequest):
-    """Search through documentation chunks"""
-    if not chunks_data or faiss_index is None:
         raise HTTPException(status_code=500, detail="Data not loaded")
     try:
-        # For now, return a simple text search
-        # In a real implementation, you'd use the FAISS index for semantic search
         query_lower = request.query.lower()
         results = []
         for chunk in chunks_data:
-            if query_lower in chunk.get('text', '').lower():
                 results.append({
                     "chunk_id": chunk.get('chunk_id'),
                     "title": chunk.get('title'),
@@ -89,11 +92,12 @@ async def search_docs(request: SearchRequest):
                     "url": chunk.get('url'),
                     "filename": chunk.get('filename'),
                     "chunk_index": chunk.get('chunk_index'),
-                    "total_chunks": chunk.get('total_chunks')
                 })
-        # Sort by relevance (simple implementation)
-        results = sorted(results, key=lambda x: len(x['text']), reverse=True)
         return SearchResponse(
             results=results[:request.limit],

 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 app = FastAPI(title="MCP Documentation Server", version="1.0.0")
 # Global variables for loaded data
 chunks_data = None
 docs_data = None
 class SearchRequest(BaseModel):
     query: str
     total: int
 def load_data():
+    """Load the embedded chunks data"""
+    global chunks_data, docs_data
     try:
         # Load chunks data
         with open('mcp_docs/index/docs_md.json', 'r', encoding='utf-8') as f:
             docs_data = json.load(f)
         print(f"Loaded {len(chunks_data)} chunks and {len(docs_data)} documents")
+        print("Using text-based search (no FAISS index required)")
     except Exception as e:
         print(f"Error loading data: {e}")
 @app.post("/search", response_model=SearchResponse)
 async def search_docs(request: SearchRequest):
+    """Search through documentation chunks using text matching"""
+    if not chunks_data:
         raise HTTPException(status_code=500, detail="Data not loaded")
     try:
         query_lower = request.query.lower()
         results = []
         for chunk in chunks_data:
+            text = chunk.get('text', '').lower()
+            title = chunk.get('title', '').lower()
+            # Simple scoring based on query matches
+            score = 0
+            if query_lower in text:
+                score += text.count(query_lower) * 2  # Text matches worth more
+            if query_lower in title:
+                score += title.count(query_lower) * 5  # Title matches worth most
+            if score > 0:
                 results.append({
                     "chunk_id": chunk.get('chunk_id'),
                     "title": chunk.get('title'),
                     "url": chunk.get('url'),
                     "filename": chunk.get('filename'),
                     "chunk_index": chunk.get('chunk_index'),
+                    "total_chunks": chunk.get('total_chunks'),
+                    "score": score
                 })
+        # Sort by relevance score
+        results = sorted(results, key=lambda x: x['score'], reverse=True)
         return SearchResponse(
             results=results[:request.limit],

requirements.txt CHANGED Viewed

@@ -1,6 +1,4 @@
 fastapi==0.104.1
 uvicorn==0.24.0
 pydantic==2.5.0
-faiss-cpu==1.7.4
-numpy==1.24.3
 python-multipart==0.0.6

 fastapi==0.104.1
 uvicorn==0.24.0
 pydantic==2.5.0
 python-multipart==0.0.6

test_app.py CHANGED Viewed

@@ -22,15 +22,8 @@ def test_data_loading():
             docs = json.load(f)
         print(f"[OK] Loaded {len(docs)} documents")
-        # Test FAISS index (if available)
-        try:
-            import faiss
-            index = faiss.read_index('mcp_docs/index/faiss_md.index')
-            print(f"[OK] Loaded FAISS index with {index.ntotal} vectors")
-        except ImportError:
-            print("[WARN] FAISS not available (will be installed in Docker)")
-        except Exception as e:
-            print(f"[WARN] FAISS index issue: {e}")
         print("\n[SUCCESS] All data files loaded successfully!")
         return True

             docs = json.load(f)
         print(f"[OK] Loaded {len(docs)} documents")
+        # Note: FAISS index is no longer required for text-based search
+        print("[INFO] Using text-based search (no FAISS index required)")
         print("\n[SUCCESS] All data files loaded successfully!")
         return True