Spaces:
Sleeping
Sleeping
Commit ·
d0b4013
1
Parent(s): 39ce191
added mcp fallback
Browse files- backend/routes/search.py +1 -1
- backend/services/qdrant_service.py +16 -6
- database/.gitignore +23 -0
- database/test_retrieval.py +0 -93
backend/routes/search.py
CHANGED
|
@@ -67,7 +67,7 @@ async def search_math_problems(
|
|
| 67 |
kb_results = await qdrant_service.search_similar(validated_question)
|
| 68 |
|
| 69 |
# Step 3: Determine if we need web search fallback
|
| 70 |
-
confidence_threshold = 0.5
|
| 71 |
best_score = kb_results[0].score if kb_results else 0.0
|
| 72 |
|
| 73 |
if best_score >= confidence_threshold:
|
|
|
|
| 67 |
kb_results = await qdrant_service.search_similar(validated_question)
|
| 68 |
|
| 69 |
# Step 3: Determine if we need web search fallback
|
| 70 |
+
confidence_threshold = 0.8 # Increased from 0.5 to 0.8 for higher confidence requirement
|
| 71 |
best_score = kb_results[0].score if kb_results else 0.0
|
| 72 |
|
| 73 |
if best_score >= confidence_threshold:
|
backend/services/qdrant_service.py
CHANGED
|
@@ -32,13 +32,20 @@ class QdrantService:
|
|
| 32 |
def _initialize(self):
|
| 33 |
"""Initialize Qdrant manager and embedding generator."""
|
| 34 |
try:
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
qdrant_config = {
|
| 37 |
-
'url':
|
| 38 |
-
'api_key': '
|
| 39 |
-
'collection_name': '
|
| 40 |
}
|
| 41 |
|
|
|
|
|
|
|
|
|
|
| 42 |
self.qdrant_manager = QdrantManager(
|
| 43 |
url=qdrant_config['url'],
|
| 44 |
api_key=qdrant_config['api_key']
|
|
@@ -68,12 +75,14 @@ class QdrantService:
|
|
| 68 |
return []
|
| 69 |
|
| 70 |
try:
|
|
|
|
| 71 |
# Generate embedding for the question
|
| 72 |
query_embedding = self.embedding_generator.embed_text(question)
|
| 73 |
|
| 74 |
# Search in Qdrant
|
|
|
|
| 75 |
results = self.qdrant_manager.search_similar(
|
| 76 |
-
collection_name=
|
| 77 |
query_vector=query_embedding,
|
| 78 |
limit=limit
|
| 79 |
)
|
|
@@ -132,7 +141,8 @@ class QdrantService:
|
|
| 132 |
request_data=request_data,
|
| 133 |
response_data=response_data,
|
| 134 |
response_time_ms=response_time_ms,
|
| 135 |
-
source=source
|
|
|
|
| 136 |
)
|
| 137 |
|
| 138 |
# TODO: Store log entry in Qdrant analytics collection
|
|
|
|
| 32 |
def _initialize(self):
|
| 33 |
"""Initialize Qdrant manager and embedding generator."""
|
| 34 |
try:
|
| 35 |
+
import os
|
| 36 |
+
from dotenv import load_dotenv
|
| 37 |
+
load_dotenv()
|
| 38 |
+
|
| 39 |
+
# Qdrant configuration from environment variables
|
| 40 |
qdrant_config = {
|
| 41 |
+
'url': os.getenv('QDRANT_URL'),
|
| 42 |
+
'api_key': os.getenv('QDRANT_API_KEY'),
|
| 43 |
+
'collection_name': os.getenv('QDRANT_COLLECTION', 'nuinamath')
|
| 44 |
}
|
| 45 |
|
| 46 |
+
if not qdrant_config['url'] or not qdrant_config['api_key']:
|
| 47 |
+
raise ValueError("QDRANT_URL and QDRANT_API_KEY must be set in environment variables")
|
| 48 |
+
|
| 49 |
self.qdrant_manager = QdrantManager(
|
| 50 |
url=qdrant_config['url'],
|
| 51 |
api_key=qdrant_config['api_key']
|
|
|
|
| 75 |
return []
|
| 76 |
|
| 77 |
try:
|
| 78 |
+
import os
|
| 79 |
# Generate embedding for the question
|
| 80 |
query_embedding = self.embedding_generator.embed_text(question)
|
| 81 |
|
| 82 |
# Search in Qdrant
|
| 83 |
+
collection_name = os.getenv('QDRANT_COLLECTION', 'nuinamath')
|
| 84 |
results = self.qdrant_manager.search_similar(
|
| 85 |
+
collection_name=collection_name,
|
| 86 |
query_vector=query_embedding,
|
| 87 |
limit=limit
|
| 88 |
)
|
|
|
|
| 141 |
request_data=request_data,
|
| 142 |
response_data=response_data,
|
| 143 |
response_time_ms=response_time_ms,
|
| 144 |
+
source=source,
|
| 145 |
+
status_code=200 # Default to 200 for successful responses
|
| 146 |
)
|
| 147 |
|
| 148 |
# TODO: Store log entry in Qdrant analytics collection
|
database/.gitignore
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python cache
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# Virtual environment
|
| 7 |
+
venv/
|
| 8 |
+
env/
|
| 9 |
+
|
| 10 |
+
# IDE files
|
| 11 |
+
.vscode/
|
| 12 |
+
.idea/
|
| 13 |
+
|
| 14 |
+
# OS files
|
| 15 |
+
.DS_Store
|
| 16 |
+
Thumbs.db
|
| 17 |
+
|
| 18 |
+
# Logs
|
| 19 |
+
*.log
|
| 20 |
+
|
| 21 |
+
# Temporary files
|
| 22 |
+
*.tmp
|
| 23 |
+
*.temp
|
database/test_retrieval.py
DELETED
|
@@ -1,93 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Test script for retrieving similar math problems from Qdrant.
|
| 3 |
-
"""
|
| 4 |
-
import logging
|
| 5 |
-
import os
|
| 6 |
-
from dotenv import load_dotenv
|
| 7 |
-
|
| 8 |
-
# Load environment variables
|
| 9 |
-
load_dotenv()
|
| 10 |
-
|
| 11 |
-
# Configuration settings
|
| 12 |
-
QDRANT_URL = os.getenv("QDRANT_URL")
|
| 13 |
-
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
| 14 |
-
QDRANT_COLLECTION = os.getenv("QDRANT_COLLECTION", "nuinamath")
|
| 15 |
-
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
|
| 16 |
-
|
| 17 |
-
from utils import EmbeddingGenerator, format_retrieval_results
|
| 18 |
-
from qdrant_manager import QdrantManager
|
| 19 |
-
|
| 20 |
-
# Set up logging
|
| 21 |
-
logging.basicConfig(level=logging.INFO)
|
| 22 |
-
logger = logging.getLogger(__name__)
|
| 23 |
-
|
| 24 |
-
def test_retrieval():
|
| 25 |
-
"""Test the retrieval system with sample math questions."""
|
| 26 |
-
|
| 27 |
-
# Sample test questions
|
| 28 |
-
test_questions = [
|
| 29 |
-
"What is the value of x in 3x + 5 = 20?",
|
| 30 |
-
"How do you find the area of a triangle given 3 sides?",
|
| 31 |
-
"Solve for y: 2y - 7 = 15",
|
| 32 |
-
"What is the derivative of x^2 + 3x?",
|
| 33 |
-
"Find the arithmetic sequence common difference"
|
| 34 |
-
]
|
| 35 |
-
|
| 36 |
-
try:
|
| 37 |
-
# Initialize components
|
| 38 |
-
logger.info("Initializing retrieval system...")
|
| 39 |
-
embedding_generator = EmbeddingGenerator(EMBEDDING_MODEL)
|
| 40 |
-
qdrant_manager = QdrantManager(QDRANT_URL, QDRANT_API_KEY)
|
| 41 |
-
|
| 42 |
-
# Test each question
|
| 43 |
-
for i, question in enumerate(test_questions, 1):
|
| 44 |
-
print(f"\n{'='*60}")
|
| 45 |
-
print(f"TEST QUERY {i}: {question}")
|
| 46 |
-
print('='*60)
|
| 47 |
-
|
| 48 |
-
# Generate embedding for the question
|
| 49 |
-
query_embedding = embedding_generator.embed_single_text(question)
|
| 50 |
-
|
| 51 |
-
# Search for similar problems
|
| 52 |
-
results = qdrant_manager.search_similar(
|
| 53 |
-
collection_name=QDRANT_COLLECTION,
|
| 54 |
-
query_vector=query_embedding,
|
| 55 |
-
limit=3,
|
| 56 |
-
score_threshold=0.1
|
| 57 |
-
)
|
| 58 |
-
|
| 59 |
-
# Format and display results
|
| 60 |
-
formatted_results = format_retrieval_results(results)
|
| 61 |
-
print(formatted_results)
|
| 62 |
-
|
| 63 |
-
except Exception as e:
|
| 64 |
-
logger.error(f"Error in retrieval test: {e}")
|
| 65 |
-
|
| 66 |
-
def test_collection_status():
|
| 67 |
-
"""Check the status of the Qdrant collection."""
|
| 68 |
-
try:
|
| 69 |
-
qdrant_manager = QdrantManager(QDRANT_URL, QDRANT_API_KEY)
|
| 70 |
-
|
| 71 |
-
print(f"\n{'='*40}")
|
| 72 |
-
print("COLLECTION STATUS")
|
| 73 |
-
print('='*40)
|
| 74 |
-
|
| 75 |
-
info = qdrant_manager.get_collection_info(QDRANT_COLLECTION)
|
| 76 |
-
if info:
|
| 77 |
-
print(f"Collection Name: {QDRANT_COLLECTION}")
|
| 78 |
-
print(f"Status: {info.status}")
|
| 79 |
-
print(f"Vectors Count: {info.vectors_count}")
|
| 80 |
-
print(f"Vector Size: {info.config.params.vectors.size}")
|
| 81 |
-
print(f"Distance Metric: {info.config.params.vectors.distance}")
|
| 82 |
-
else:
|
| 83 |
-
print("Collection not found or error occurred")
|
| 84 |
-
|
| 85 |
-
except Exception as e:
|
| 86 |
-
logger.error(f"Error checking collection status: {e}")
|
| 87 |
-
|
| 88 |
-
if __name__ == "__main__":
|
| 89 |
-
print("Testing Qdrant Collection Status...")
|
| 90 |
-
test_collection_status()
|
| 91 |
-
|
| 92 |
-
print("\n\nTesting Retrieval System...")
|
| 93 |
-
test_retrieval()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|