Baktabek commited on
Commit
b9767b5
·
verified ·
1 Parent(s): 011512e

Delete old qdrant_retriever.py

Browse files
app/infrastructure/external/qdrant_retriever.py DELETED
@@ -1,133 +0,0 @@
1
- """
2
- Infrastructure - Qdrant Vector Store
3
- """
4
- from typing import Dict, List, Optional
5
- from uuid import UUID
6
-
7
- from qdrant_client import AsyncQdrantClient
8
- from qdrant_client.models import Distance, PointStruct, VectorParams, Filter, FieldCondition, MatchValue
9
-
10
- from app.domain.interfaces import IRetriever, RetrievalResult
11
-
12
-
13
- class QdrantRetriever(IRetriever):
14
- """Qdrant vector store implementation"""
15
-
16
- def __init__(
17
- self,
18
- url: str,
19
- collection_name: str,
20
- vector_size: int = 384,
21
- api_key: Optional[str] = None,
22
- ):
23
- self.url = url
24
- self.collection_name = collection_name
25
- self.vector_size = vector_size
26
-
27
- # Handle in-memory mode
28
- if url.startswith("memory://"):
29
- self.client = AsyncQdrantClient(location=":memory:")
30
- else:
31
- self.client = AsyncQdrantClient(url=url, api_key=api_key)
32
-
33
- async def initialize_collection(self) -> None:
34
- """Initialize Qdrant collection"""
35
- collections = await self.client.get_collections()
36
- collection_names = [c.name for c in collections.collections]
37
-
38
- if self.collection_name not in collection_names:
39
- await self.client.create_collection(
40
- collection_name=self.collection_name,
41
- vectors_config=VectorParams(size=self.vector_size, distance=Distance.COSINE),
42
- )
43
-
44
- async def search(
45
- self,
46
- query: str,
47
- top_k: int = 10,
48
- filters: Optional[dict] = None,
49
- min_score: float = 0.0,
50
- ) -> List[RetrievalResult]:
51
- """Search for relevant documents"""
52
- # Note: This requires embedding the query first
53
- # In practice, this would be called with query_vector
54
- raise NotImplementedError("Use search_by_vector instead")
55
-
56
- async def search_by_vector(
57
- self,
58
- query_vector: List[float],
59
- top_k: int = 10,
60
- filters: Optional[dict] = None,
61
- min_score: float = 0.0,
62
- ) -> List[RetrievalResult]:
63
- """Search by pre-computed vector"""
64
-
65
- # Build filter
66
- qdrant_filter = None
67
- if filters:
68
- conditions = []
69
- for key, value in filters.items():
70
- conditions.append(
71
- FieldCondition(key=key, match=MatchValue(value=value))
72
- )
73
- if conditions:
74
- qdrant_filter = Filter(must=conditions)
75
-
76
- # Search
77
- search_result = await self.client.search(
78
- collection_name=self.collection_name,
79
- query_vector=query_vector,
80
- limit=top_k,
81
- query_filter=qdrant_filter,
82
- score_threshold=min_score,
83
- )
84
-
85
- # Convert to RetrievalResult
86
- results = []
87
- for hit in search_result:
88
- payload = hit.payload or {}
89
- results.append(
90
- RetrievalResult(
91
- content=payload.get("content", ""),
92
- score=hit.score,
93
- document_id=payload.get("document_id", ""),
94
- chunk_index=payload.get("chunk_index", 0),
95
- metadata=payload.get("metadata", {}),
96
- )
97
- )
98
-
99
- return results
100
-
101
- async def hybrid_search(
102
- self,
103
- query: str,
104
- top_k: int = 10,
105
- alpha: float = 0.5,
106
- filters: Optional[dict] = None,
107
- ) -> List[RetrievalResult]:
108
- """Hybrid search (dense + sparse)"""
109
- # TODO: Implement hybrid search with BM25
110
- raise NotImplementedError("Hybrid search not yet implemented")
111
-
112
- async def add_documents(
113
- self,
114
- vectors: List[List[float]],
115
- documents: List[Dict],
116
- ids: Optional[List[str]] = None,
117
- ) -> None:
118
- """Add documents to collection"""
119
- points = []
120
- for i, (vector, doc) in enumerate(zip(vectors, documents)):
121
- point_id = ids[i] if ids else str(UUID(int=i))
122
- points.append(
123
- PointStruct(
124
- id=point_id,
125
- vector=vector,
126
- payload=doc,
127
- )
128
- )
129
-
130
- await self.client.upsert(
131
- collection_name=self.collection_name,
132
- points=points,
133
- )