Spaces:
Running
Running
Remove FAISS binary file, use text-based search instead
Browse files- .gitignore +4 -0
- app.py +20 -16
- requirements.txt +0 -2
- test_app.py +2 -9
.gitignore
CHANGED
|
@@ -41,3 +41,7 @@ Thumbs.db
|
|
| 41 |
|
| 42 |
# Logs
|
| 43 |
*.log
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
# Logs
|
| 43 |
*.log
|
| 44 |
+
|
| 45 |
+
# FAISS index files
|
| 46 |
+
*.index
|
| 47 |
+
mcp_docs/index/faiss_md.index
|
app.py
CHANGED
|
@@ -4,8 +4,6 @@ from typing import List, Dict, Any, Optional
|
|
| 4 |
from fastapi import FastAPI, HTTPException
|
| 5 |
from fastapi.middleware.cors import CORSMiddleware
|
| 6 |
from pydantic import BaseModel
|
| 7 |
-
import faiss
|
| 8 |
-
import numpy as np
|
| 9 |
|
| 10 |
app = FastAPI(title="MCP Documentation Server", version="1.0.0")
|
| 11 |
|
|
@@ -21,7 +19,6 @@ app.add_middleware(
|
|
| 21 |
# Global variables for loaded data
|
| 22 |
chunks_data = None
|
| 23 |
docs_data = None
|
| 24 |
-
faiss_index = None
|
| 25 |
|
| 26 |
class SearchRequest(BaseModel):
|
| 27 |
query: str
|
|
@@ -32,8 +29,8 @@ class SearchResponse(BaseModel):
|
|
| 32 |
total: int
|
| 33 |
|
| 34 |
def load_data():
|
| 35 |
-
"""Load the embedded chunks
|
| 36 |
-
global chunks_data, docs_data
|
| 37 |
|
| 38 |
try:
|
| 39 |
# Load chunks data
|
|
@@ -44,10 +41,8 @@ def load_data():
|
|
| 44 |
with open('mcp_docs/index/docs_md.json', 'r', encoding='utf-8') as f:
|
| 45 |
docs_data = json.load(f)
|
| 46 |
|
| 47 |
-
# Load FAISS index
|
| 48 |
-
faiss_index = faiss.read_index('mcp_docs/index/faiss_md.index')
|
| 49 |
-
|
| 50 |
print(f"Loaded {len(chunks_data)} chunks and {len(docs_data)} documents")
|
|
|
|
| 51 |
|
| 52 |
except Exception as e:
|
| 53 |
print(f"Error loading data: {e}")
|
|
@@ -70,18 +65,26 @@ async def root():
|
|
| 70 |
|
| 71 |
@app.post("/search", response_model=SearchResponse)
|
| 72 |
async def search_docs(request: SearchRequest):
|
| 73 |
-
"""Search through documentation chunks"""
|
| 74 |
-
if not chunks_data
|
| 75 |
raise HTTPException(status_code=500, detail="Data not loaded")
|
| 76 |
|
| 77 |
try:
|
| 78 |
-
# For now, return a simple text search
|
| 79 |
-
# In a real implementation, you'd use the FAISS index for semantic search
|
| 80 |
query_lower = request.query.lower()
|
| 81 |
results = []
|
| 82 |
|
| 83 |
for chunk in chunks_data:
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
results.append({
|
| 86 |
"chunk_id": chunk.get('chunk_id'),
|
| 87 |
"title": chunk.get('title'),
|
|
@@ -89,11 +92,12 @@ async def search_docs(request: SearchRequest):
|
|
| 89 |
"url": chunk.get('url'),
|
| 90 |
"filename": chunk.get('filename'),
|
| 91 |
"chunk_index": chunk.get('chunk_index'),
|
| 92 |
-
"total_chunks": chunk.get('total_chunks')
|
|
|
|
| 93 |
})
|
| 94 |
|
| 95 |
-
# Sort by relevance
|
| 96 |
-
results = sorted(results, key=lambda x:
|
| 97 |
|
| 98 |
return SearchResponse(
|
| 99 |
results=results[:request.limit],
|
|
|
|
| 4 |
from fastapi import FastAPI, HTTPException
|
| 5 |
from fastapi.middleware.cors import CORSMiddleware
|
| 6 |
from pydantic import BaseModel
|
|
|
|
|
|
|
| 7 |
|
| 8 |
app = FastAPI(title="MCP Documentation Server", version="1.0.0")
|
| 9 |
|
|
|
|
| 19 |
# Global variables for loaded data
|
| 20 |
chunks_data = None
|
| 21 |
docs_data = None
|
|
|
|
| 22 |
|
| 23 |
class SearchRequest(BaseModel):
|
| 24 |
query: str
|
|
|
|
| 29 |
total: int
|
| 30 |
|
| 31 |
def load_data():
|
| 32 |
+
"""Load the embedded chunks data"""
|
| 33 |
+
global chunks_data, docs_data
|
| 34 |
|
| 35 |
try:
|
| 36 |
# Load chunks data
|
|
|
|
| 41 |
with open('mcp_docs/index/docs_md.json', 'r', encoding='utf-8') as f:
|
| 42 |
docs_data = json.load(f)
|
| 43 |
|
|
|
|
|
|
|
|
|
|
| 44 |
print(f"Loaded {len(chunks_data)} chunks and {len(docs_data)} documents")
|
| 45 |
+
print("Using text-based search (no FAISS index required)")
|
| 46 |
|
| 47 |
except Exception as e:
|
| 48 |
print(f"Error loading data: {e}")
|
|
|
|
| 65 |
|
| 66 |
@app.post("/search", response_model=SearchResponse)
|
| 67 |
async def search_docs(request: SearchRequest):
|
| 68 |
+
"""Search through documentation chunks using text matching"""
|
| 69 |
+
if not chunks_data:
|
| 70 |
raise HTTPException(status_code=500, detail="Data not loaded")
|
| 71 |
|
| 72 |
try:
|
|
|
|
|
|
|
| 73 |
query_lower = request.query.lower()
|
| 74 |
results = []
|
| 75 |
|
| 76 |
for chunk in chunks_data:
|
| 77 |
+
text = chunk.get('text', '').lower()
|
| 78 |
+
title = chunk.get('title', '').lower()
|
| 79 |
+
|
| 80 |
+
# Simple scoring based on query matches
|
| 81 |
+
score = 0
|
| 82 |
+
if query_lower in text:
|
| 83 |
+
score += text.count(query_lower) * 2 # Text matches worth more
|
| 84 |
+
if query_lower in title:
|
| 85 |
+
score += title.count(query_lower) * 5 # Title matches worth most
|
| 86 |
+
|
| 87 |
+
if score > 0:
|
| 88 |
results.append({
|
| 89 |
"chunk_id": chunk.get('chunk_id'),
|
| 90 |
"title": chunk.get('title'),
|
|
|
|
| 92 |
"url": chunk.get('url'),
|
| 93 |
"filename": chunk.get('filename'),
|
| 94 |
"chunk_index": chunk.get('chunk_index'),
|
| 95 |
+
"total_chunks": chunk.get('total_chunks'),
|
| 96 |
+
"score": score
|
| 97 |
})
|
| 98 |
|
| 99 |
+
# Sort by relevance score
|
| 100 |
+
results = sorted(results, key=lambda x: x['score'], reverse=True)
|
| 101 |
|
| 102 |
return SearchResponse(
|
| 103 |
results=results[:request.limit],
|
requirements.txt
CHANGED
|
@@ -1,6 +1,4 @@
|
|
| 1 |
fastapi==0.104.1
|
| 2 |
uvicorn==0.24.0
|
| 3 |
pydantic==2.5.0
|
| 4 |
-
faiss-cpu==1.7.4
|
| 5 |
-
numpy==1.24.3
|
| 6 |
python-multipart==0.0.6
|
|
|
|
| 1 |
fastapi==0.104.1
|
| 2 |
uvicorn==0.24.0
|
| 3 |
pydantic==2.5.0
|
|
|
|
|
|
|
| 4 |
python-multipart==0.0.6
|
test_app.py
CHANGED
|
@@ -22,15 +22,8 @@ def test_data_loading():
|
|
| 22 |
docs = json.load(f)
|
| 23 |
print(f"[OK] Loaded {len(docs)} documents")
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
|
| 27 |
-
import faiss
|
| 28 |
-
index = faiss.read_index('mcp_docs/index/faiss_md.index')
|
| 29 |
-
print(f"[OK] Loaded FAISS index with {index.ntotal} vectors")
|
| 30 |
-
except ImportError:
|
| 31 |
-
print("[WARN] FAISS not available (will be installed in Docker)")
|
| 32 |
-
except Exception as e:
|
| 33 |
-
print(f"[WARN] FAISS index issue: {e}")
|
| 34 |
|
| 35 |
print("\n[SUCCESS] All data files loaded successfully!")
|
| 36 |
return True
|
|
|
|
| 22 |
docs = json.load(f)
|
| 23 |
print(f"[OK] Loaded {len(docs)} documents")
|
| 24 |
|
| 25 |
+
# Note: FAISS index is no longer required for text-based search
|
| 26 |
+
print("[INFO] Using text-based search (no FAISS index required)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
print("\n[SUCCESS] All data files loaded successfully!")
|
| 29 |
return True
|