Spaces:
Sleeping
Sleeping
| from qdrant_client import QdrantClient | |
| from qdrant_client.http import models | |
| from typing import List, Dict, Any | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| class VectorDBService: | |
| def __init__(self, host: str = "localhost", port: int = 6333, cloud_client=None): | |
| self.collection_name = "textbook_content" | |
| self.is_available = False # Initialize as False by default | |
| if cloud_client: | |
| # Use provided cloud client | |
| self.client = cloud_client | |
| try: | |
| self._init_collection() | |
| self.is_available = True | |
| except Exception as e: | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| logger.warning(f"Vector database not available: {str(e)}. Running in fallback mode.") | |
| self.client = None | |
| else: | |
| # Use local client | |
| try: | |
| self.client = QdrantClient(host=host, port=port) | |
| self._init_collection() | |
| self.is_available = True | |
| except Exception as e: | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| logger.warning(f"Vector database not available: {str(e)}. Running in fallback mode.") | |
| self.client = None | |
| def _init_collection(self): | |
| """Initialize the Qdrant collection for textbook content""" | |
| if not self.is_available: | |
| return | |
| try: | |
| # Check if collection exists | |
| self.client.get_collection(self.collection_name) | |
| except: | |
| # Create collection if it doesn't exist | |
| self.client.create_collection( | |
| collection_name=self.collection_name, | |
| vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE), # Assuming OpenAI embeddings | |
| ) | |
| logger.info(f"Created collection: {self.collection_name}") | |
| def add_content(self, content_id: str, content: str, embeddings: List[float], metadata: Dict[str, Any] = None): | |
| """Add textbook content to the vector database""" | |
| if not self.is_available: | |
| logger.warning("Vector database not available, skipping content addition") | |
| return | |
| if metadata is None: | |
| metadata = {} | |
| self.client.upsert( | |
| collection_name=self.collection_name, | |
| points=[ | |
| models.PointStruct( | |
| id=content_id, | |
| vector=embeddings, | |
| payload={ | |
| "content": content, | |
| "metadata": metadata | |
| } | |
| ) | |
| ] | |
| ) | |
| logger.info(f"Added content to vector DB: {content_id}") | |
| def search_content(self, query_embeddings: List[float], limit: int = 10) -> List[Dict[str, Any]]: | |
| """Search for relevant content based on query embeddings""" | |
| if not self.is_available or self.client is None: | |
| logger.warning("Vector database not available, returning empty results") | |
| # Return empty list when database is not available | |
| return [] | |
| try: | |
| # Handle different Qdrant client versions | |
| if hasattr(self.client, 'search'): | |
| # Newer version of Qdrant client | |
| results = self.client.search( | |
| collection_name=self.collection_name, | |
| query_vector=query_embeddings, | |
| limit=limit | |
| ) | |
| else: | |
| # Older version or different interface | |
| results = self.client.search( | |
| collection_name=self.collection_name, | |
| query_vector=query_embeddings, | |
| limit=limit | |
| ) | |
| except AttributeError: | |
| logger.warning("Qdrant client search method not available, using direct processing") | |
| return [] | |
| except Exception as e: | |
| logger.warning(f"Vector database search failed, using direct processing: {str(e)}") | |
| # Return empty list when search fails | |
| return [] | |
| return [ | |
| { | |
| "id": result.id, | |
| "content": result.payload.get("content"), | |
| "metadata": result.payload.get("metadata", {}), | |
| "score": getattr(result, 'score', 0.0) # Handle different result structures | |
| } | |
| for result in results | |
| ] | |
| def delete_content(self, content_id: str): | |
| """Delete content from the vector database""" | |
| if not self.is_available: | |
| logger.warning("Vector database not available, skipping content deletion") | |
| return | |
| self.client.delete( | |
| collection_name=self.collection_name, | |
| points_selector=models.PointIdsList(points=[content_id]) | |
| ) | |
| logger.info(f"Deleted content from vector DB: {content_id}") |