Baktabek commited on
Commit
22af138
·
verified ·
1 Parent(s): e12568c

Fix: Handle memory:// URL for Qdrant in-memory mode

Browse files
app//infrastructure//external//qdrant_retriever.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Infrastructure - Qdrant Vector Store
3
+ """
4
+ from typing import Dict, List, Optional
5
+ from uuid import UUID
6
+
7
+ from qdrant_client import AsyncQdrantClient
8
+ from qdrant_client.models import Distance, PointStruct, VectorParams, Filter, FieldCondition, MatchValue
9
+
10
+ from app.domain.interfaces import IRetriever, RetrievalResult
11
+
12
+
13
+ class QdrantRetriever(IRetriever):
14
+ """Qdrant vector store implementation"""
15
+
16
+ def __init__(
17
+ self,
18
+ url: str,
19
+ collection_name: str,
20
+ vector_size: int = 384,
21
+ api_key: Optional[str] = None,
22
+ ):
23
+ self.url = url
24
+ self.collection_name = collection_name
25
+ self.vector_size = vector_size
26
+
27
+ # Handle in-memory mode
28
+ if url.startswith("memory://"):
29
+ self.client = AsyncQdrantClient(location=":memory:")
30
+ else:
31
+ self.client = AsyncQdrantClient(url=url, api_key=api_key)
32
+
33
+ async def initialize_collection(self) -> None:
34
+ """Initialize Qdrant collection"""
35
+ collections = await self.client.get_collections()
36
+ collection_names = [c.name for c in collections.collections]
37
+
38
+ if self.collection_name not in collection_names:
39
+ await self.client.create_collection(
40
+ collection_name=self.collection_name,
41
+ vectors_config=VectorParams(size=self.vector_size, distance=Distance.COSINE),
42
+ )
43
+
44
+ async def search(
45
+ self,
46
+ query: str,
47
+ top_k: int = 10,
48
+ filters: Optional[dict] = None,
49
+ min_score: float = 0.0,
50
+ ) -> List[RetrievalResult]:
51
+ """Search for relevant documents"""
52
+ # Note: This requires embedding the query first
53
+ # In practice, this would be called with query_vector
54
+ raise NotImplementedError("Use search_by_vector instead")
55
+
56
+ async def search_by_vector(
57
+ self,
58
+ query_vector: List[float],
59
+ top_k: int = 10,
60
+ filters: Optional[dict] = None,
61
+ min_score: float = 0.0,
62
+ ) -> List[RetrievalResult]:
63
+ """Search by pre-computed vector"""
64
+
65
+ # Build filter
66
+ qdrant_filter = None
67
+ if filters:
68
+ conditions = []
69
+ for key, value in filters.items():
70
+ conditions.append(
71
+ FieldCondition(key=key, match=MatchValue(value=value))
72
+ )
73
+ if conditions:
74
+ qdrant_filter = Filter(must=conditions)
75
+
76
+ # Search
77
+ search_result = await self.client.search(
78
+ collection_name=self.collection_name,
79
+ query_vector=query_vector,
80
+ limit=top_k,
81
+ query_filter=qdrant_filter,
82
+ score_threshold=min_score,
83
+ )
84
+
85
+ # Convert to RetrievalResult
86
+ results = []
87
+ for hit in search_result:
88
+ payload = hit.payload or {}
89
+ results.append(
90
+ RetrievalResult(
91
+ content=payload.get("content", ""),
92
+ score=hit.score,
93
+ document_id=payload.get("document_id", ""),
94
+ chunk_index=payload.get("chunk_index", 0),
95
+ metadata=payload.get("metadata", {}),
96
+ )
97
+ )
98
+
99
+ return results
100
+
101
+ async def hybrid_search(
102
+ self,
103
+ query: str,
104
+ top_k: int = 10,
105
+ alpha: float = 0.5,
106
+ filters: Optional[dict] = None,
107
+ ) -> List[RetrievalResult]:
108
+ """Hybrid search (semantic + keyword)"""
109
+ # Simplified - in production combine with keyword search
110
+ raise NotImplementedError("Hybrid search requires integration with keyword search")
111
+
112
+ async def upsert_points(
113
+ self,
114
+ points: List[Dict],
115
+ ) -> None:
116
+ """Upsert points to collection"""
117
+ qdrant_points = [
118
+ PointStruct(
119
+ id=point["id"],
120
+ vector=point["vector"],
121
+ payload=point["payload"],
122
+ )
123
+ for point in points
124
+ ]
125
+
126
+ await self.client.upsert(
127
+ collection_name=self.collection_name,
128
+ points=qdrant_points,
129
+ )