File size: 4,991 Bytes
db7c1e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
from qdrant_client import QdrantClient
from qdrant_client.http import models
from typing import List, Dict, Any
import logging

logger = logging.getLogger(__name__)

class VectorDBService:
    def __init__(self, host: str = "localhost", port: int = 6333, cloud_client=None):
        self.collection_name = "textbook_content"
        self.is_available = False  # Initialize as False by default

        if cloud_client:
            # Use provided cloud client
            self.client = cloud_client
            try:
                self._init_collection()
                self.is_available = True
            except Exception as e:
                import logging
                logger = logging.getLogger(__name__)
                logger.warning(f"Vector database not available: {str(e)}. Running in fallback mode.")
                self.client = None
        else:
            # Use local client
            try:
                self.client = QdrantClient(host=host, port=port)
                self._init_collection()
                self.is_available = True
            except Exception as e:
                import logging
                logger = logging.getLogger(__name__)
                logger.warning(f"Vector database not available: {str(e)}. Running in fallback mode.")
                self.client = None

    def _init_collection(self):
        """Initialize the Qdrant collection for textbook content"""
        if not self.is_available:
            return

        try:
            # Check if collection exists
            self.client.get_collection(self.collection_name)
        except:
            # Create collection if it doesn't exist
            self.client.create_collection(
                collection_name=self.collection_name,
                vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE),  # Assuming OpenAI embeddings
            )
            logger.info(f"Created collection: {self.collection_name}")

    def add_content(self, content_id: str, content: str, embeddings: List[float], metadata: Dict[str, Any] = None):
        """Add textbook content to the vector database"""
        if not self.is_available:
            logger.warning("Vector database not available, skipping content addition")
            return

        if metadata is None:
            metadata = {}

        self.client.upsert(
            collection_name=self.collection_name,
            points=[
                models.PointStruct(
                    id=content_id,
                    vector=embeddings,
                    payload={
                        "content": content,
                        "metadata": metadata
                    }
                )
            ]
        )
        logger.info(f"Added content to vector DB: {content_id}")

    def search_content(self, query_embeddings: List[float], limit: int = 10) -> List[Dict[str, Any]]:
        """Search for relevant content based on query embeddings"""
        if not self.is_available or self.client is None:
            logger.warning("Vector database not available, returning empty results")
            # Return empty list when database is not available
            return []

        try:
            # Handle different Qdrant client versions
            if hasattr(self.client, 'search'):
                # Newer version of Qdrant client
                results = self.client.search(
                    collection_name=self.collection_name,
                    query_vector=query_embeddings,
                    limit=limit
                )
            else:
                # Older version or different interface
                results = self.client.search(
                    collection_name=self.collection_name,
                    query_vector=query_embeddings,
                    limit=limit
                )
        except AttributeError:
            logger.warning("Qdrant client search method not available, using direct processing")
            return []
        except Exception as e:
            logger.warning(f"Vector database search failed, using direct processing: {str(e)}")
            # Return empty list when search fails
            return []

        return [
            {
                "id": result.id,
                "content": result.payload.get("content"),
                "metadata": result.payload.get("metadata", {}),
                "score": getattr(result, 'score', 0.0)  # Handle different result structures
            }
            for result in results
        ]

    def delete_content(self, content_id: str):
        """Delete content from the vector database"""
        if not self.is_available:
            logger.warning("Vector database not available, skipping content deletion")
            return

        self.client.delete(
            collection_name=self.collection_name,
            points_selector=models.PointIdsList(points=[content_id])
        )
        logger.info(f"Deleted content from vector DB: {content_id}")