ai-textbook-backend / services /vector_db.py
Naveedtechlab's picture
Add full AI Native Textbook project source code
db7c1e8
from qdrant_client import QdrantClient
from qdrant_client.http import models
from typing import List, Dict, Any
import logging
logger = logging.getLogger(__name__)
class VectorDBService:
def __init__(self, host: str = "localhost", port: int = 6333, cloud_client=None):
self.collection_name = "textbook_content"
self.is_available = False # Initialize as False by default
if cloud_client:
# Use provided cloud client
self.client = cloud_client
try:
self._init_collection()
self.is_available = True
except Exception as e:
import logging
logger = logging.getLogger(__name__)
logger.warning(f"Vector database not available: {str(e)}. Running in fallback mode.")
self.client = None
else:
# Use local client
try:
self.client = QdrantClient(host=host, port=port)
self._init_collection()
self.is_available = True
except Exception as e:
import logging
logger = logging.getLogger(__name__)
logger.warning(f"Vector database not available: {str(e)}. Running in fallback mode.")
self.client = None
def _init_collection(self):
"""Initialize the Qdrant collection for textbook content"""
if not self.is_available:
return
try:
# Check if collection exists
self.client.get_collection(self.collection_name)
except:
# Create collection if it doesn't exist
self.client.create_collection(
collection_name=self.collection_name,
vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE), # Assuming OpenAI embeddings
)
logger.info(f"Created collection: {self.collection_name}")
def add_content(self, content_id: str, content: str, embeddings: List[float], metadata: Dict[str, Any] = None):
"""Add textbook content to the vector database"""
if not self.is_available:
logger.warning("Vector database not available, skipping content addition")
return
if metadata is None:
metadata = {}
self.client.upsert(
collection_name=self.collection_name,
points=[
models.PointStruct(
id=content_id,
vector=embeddings,
payload={
"content": content,
"metadata": metadata
}
)
]
)
logger.info(f"Added content to vector DB: {content_id}")
def search_content(self, query_embeddings: List[float], limit: int = 10) -> List[Dict[str, Any]]:
"""Search for relevant content based on query embeddings"""
if not self.is_available or self.client is None:
logger.warning("Vector database not available, returning empty results")
# Return empty list when database is not available
return []
try:
# Handle different Qdrant client versions
if hasattr(self.client, 'search'):
# Newer version of Qdrant client
results = self.client.search(
collection_name=self.collection_name,
query_vector=query_embeddings,
limit=limit
)
else:
# Older version or different interface
results = self.client.search(
collection_name=self.collection_name,
query_vector=query_embeddings,
limit=limit
)
except AttributeError:
logger.warning("Qdrant client search method not available, using direct processing")
return []
except Exception as e:
logger.warning(f"Vector database search failed, using direct processing: {str(e)}")
# Return empty list when search fails
return []
return [
{
"id": result.id,
"content": result.payload.get("content"),
"metadata": result.payload.get("metadata", {}),
"score": getattr(result, 'score', 0.0) # Handle different result structures
}
for result in results
]
def delete_content(self, content_id: str):
"""Delete content from the vector database"""
if not self.is_available:
logger.warning("Vector database not available, skipping content deletion")
return
self.client.delete(
collection_name=self.collection_name,
points_selector=models.PointIdsList(points=[content_id])
)
logger.info(f"Deleted content from vector DB: {content_id}")