galcan commited on
Commit
8b215ec
·
1 Parent(s): 587ea32

Remove FAISS binary file, use text-based search instead

Browse files
Files changed (4) hide show
  1. .gitignore +4 -0
  2. app.py +20 -16
  3. requirements.txt +0 -2
  4. test_app.py +2 -9
.gitignore CHANGED
@@ -41,3 +41,7 @@ Thumbs.db
41
 
42
  # Logs
43
  *.log
 
 
 
 
 
41
 
42
  # Logs
43
  *.log
44
+
45
+ # FAISS index files
46
+ *.index
47
+ mcp_docs/index/faiss_md.index
app.py CHANGED
@@ -4,8 +4,6 @@ from typing import List, Dict, Any, Optional
4
  from fastapi import FastAPI, HTTPException
5
  from fastapi.middleware.cors import CORSMiddleware
6
  from pydantic import BaseModel
7
- import faiss
8
- import numpy as np
9
 
10
  app = FastAPI(title="MCP Documentation Server", version="1.0.0")
11
 
@@ -21,7 +19,6 @@ app.add_middleware(
21
  # Global variables for loaded data
22
  chunks_data = None
23
  docs_data = None
24
- faiss_index = None
25
 
26
  class SearchRequest(BaseModel):
27
  query: str
@@ -32,8 +29,8 @@ class SearchResponse(BaseModel):
32
  total: int
33
 
34
  def load_data():
35
- """Load the embedded chunks and FAISS index"""
36
- global chunks_data, docs_data, faiss_index
37
 
38
  try:
39
  # Load chunks data
@@ -44,10 +41,8 @@ def load_data():
44
  with open('mcp_docs/index/docs_md.json', 'r', encoding='utf-8') as f:
45
  docs_data = json.load(f)
46
 
47
- # Load FAISS index
48
- faiss_index = faiss.read_index('mcp_docs/index/faiss_md.index')
49
-
50
  print(f"Loaded {len(chunks_data)} chunks and {len(docs_data)} documents")
 
51
 
52
  except Exception as e:
53
  print(f"Error loading data: {e}")
@@ -70,18 +65,26 @@ async def root():
70
 
71
  @app.post("/search", response_model=SearchResponse)
72
  async def search_docs(request: SearchRequest):
73
- """Search through documentation chunks"""
74
- if not chunks_data or faiss_index is None:
75
  raise HTTPException(status_code=500, detail="Data not loaded")
76
 
77
  try:
78
- # For now, return a simple text search
79
- # In a real implementation, you'd use the FAISS index for semantic search
80
  query_lower = request.query.lower()
81
  results = []
82
 
83
  for chunk in chunks_data:
84
- if query_lower in chunk.get('text', '').lower():
 
 
 
 
 
 
 
 
 
 
85
  results.append({
86
  "chunk_id": chunk.get('chunk_id'),
87
  "title": chunk.get('title'),
@@ -89,11 +92,12 @@ async def search_docs(request: SearchRequest):
89
  "url": chunk.get('url'),
90
  "filename": chunk.get('filename'),
91
  "chunk_index": chunk.get('chunk_index'),
92
- "total_chunks": chunk.get('total_chunks')
 
93
  })
94
 
95
- # Sort by relevance (simple implementation)
96
- results = sorted(results, key=lambda x: len(x['text']), reverse=True)
97
 
98
  return SearchResponse(
99
  results=results[:request.limit],
 
4
  from fastapi import FastAPI, HTTPException
5
  from fastapi.middleware.cors import CORSMiddleware
6
  from pydantic import BaseModel
 
 
7
 
8
  app = FastAPI(title="MCP Documentation Server", version="1.0.0")
9
 
 
19
  # Global variables for loaded data
20
  chunks_data = None
21
  docs_data = None
 
22
 
23
  class SearchRequest(BaseModel):
24
  query: str
 
29
  total: int
30
 
31
  def load_data():
32
+ """Load the embedded chunks data"""
33
+ global chunks_data, docs_data
34
 
35
  try:
36
  # Load chunks data
 
41
  with open('mcp_docs/index/docs_md.json', 'r', encoding='utf-8') as f:
42
  docs_data = json.load(f)
43
 
 
 
 
44
  print(f"Loaded {len(chunks_data)} chunks and {len(docs_data)} documents")
45
+ print("Using text-based search (no FAISS index required)")
46
 
47
  except Exception as e:
48
  print(f"Error loading data: {e}")
 
65
 
66
  @app.post("/search", response_model=SearchResponse)
67
  async def search_docs(request: SearchRequest):
68
+ """Search through documentation chunks using text matching"""
69
+ if not chunks_data:
70
  raise HTTPException(status_code=500, detail="Data not loaded")
71
 
72
  try:
 
 
73
  query_lower = request.query.lower()
74
  results = []
75
 
76
  for chunk in chunks_data:
77
+ text = chunk.get('text', '').lower()
78
+ title = chunk.get('title', '').lower()
79
+
80
+ # Simple scoring based on query matches
81
+ score = 0
82
+ if query_lower in text:
83
+ score += text.count(query_lower) * 2 # Text matches worth more
84
+ if query_lower in title:
85
+ score += title.count(query_lower) * 5 # Title matches worth most
86
+
87
+ if score > 0:
88
  results.append({
89
  "chunk_id": chunk.get('chunk_id'),
90
  "title": chunk.get('title'),
 
92
  "url": chunk.get('url'),
93
  "filename": chunk.get('filename'),
94
  "chunk_index": chunk.get('chunk_index'),
95
+ "total_chunks": chunk.get('total_chunks'),
96
+ "score": score
97
  })
98
 
99
+ # Sort by relevance score
100
+ results = sorted(results, key=lambda x: x['score'], reverse=True)
101
 
102
  return SearchResponse(
103
  results=results[:request.limit],
requirements.txt CHANGED
@@ -1,6 +1,4 @@
1
  fastapi==0.104.1
2
  uvicorn==0.24.0
3
  pydantic==2.5.0
4
- faiss-cpu==1.7.4
5
- numpy==1.24.3
6
  python-multipart==0.0.6
 
1
  fastapi==0.104.1
2
  uvicorn==0.24.0
3
  pydantic==2.5.0
 
 
4
  python-multipart==0.0.6
test_app.py CHANGED
@@ -22,15 +22,8 @@ def test_data_loading():
22
  docs = json.load(f)
23
  print(f"[OK] Loaded {len(docs)} documents")
24
 
25
- # Test FAISS index (if available)
26
- try:
27
- import faiss
28
- index = faiss.read_index('mcp_docs/index/faiss_md.index')
29
- print(f"[OK] Loaded FAISS index with {index.ntotal} vectors")
30
- except ImportError:
31
- print("[WARN] FAISS not available (will be installed in Docker)")
32
- except Exception as e:
33
- print(f"[WARN] FAISS index issue: {e}")
34
 
35
  print("\n[SUCCESS] All data files loaded successfully!")
36
  return True
 
22
  docs = json.load(f)
23
  print(f"[OK] Loaded {len(docs)} documents")
24
 
25
+ # Note: FAISS index is no longer required for text-based search
26
+ print("[INFO] Using text-based search (no FAISS index required)")
 
 
 
 
 
 
 
27
 
28
  print("\n[SUCCESS] All data files loaded successfully!")
29
  return True