Spaces:
Sleeping
Sleeping
jeevan
commited on
Commit
·
4c501f4
1
Parent(s):
637aeec
working local version
Browse files- RagPipeline.py +1 -1
- aimakerspace/vectordatabase.py +45 -12
RagPipeline.py
CHANGED
|
@@ -23,7 +23,7 @@ class RetrievalAugmentedQAPipeline:
|
|
| 23 |
context_list = self.vector_db_retriever.search_by_text(user_query, k=4)
|
| 24 |
|
| 25 |
context_prompt = ""
|
| 26 |
-
for context in context_list
|
| 27 |
context_prompt += context[0] + "\n"
|
| 28 |
|
| 29 |
formatted_system_prompt = self.system_role_prompt.create_message()
|
|
|
|
| 23 |
context_list = self.vector_db_retriever.search_by_text(user_query, k=4)
|
| 24 |
|
| 25 |
context_prompt = ""
|
| 26 |
+
for context in context_list:
|
| 27 |
context_prompt += context[0] + "\n"
|
| 28 |
|
| 29 |
formatted_system_prompt = self.system_role_prompt.create_message()
|
aimakerspace/vectordatabase.py
CHANGED
|
@@ -6,7 +6,7 @@ from typing import List, Tuple, Callable
|
|
| 6 |
from aimakerspace.openai_utils.embedding import EmbeddingModel
|
| 7 |
import asyncio
|
| 8 |
from qdrant_client import models, QdrantClient
|
| 9 |
-
from qdrant_client.models import PointStruct,VectorParams,Distance
|
| 10 |
|
| 11 |
collection_name = "embedding_collection"
|
| 12 |
|
|
@@ -76,20 +76,20 @@ class VectorDatabase:
|
|
| 76 |
self.qdrant_client = QdrantClient(":memory:")
|
| 77 |
vector_params = VectorParams(
|
| 78 |
size=embedding_model.dimensions, # vector size
|
| 79 |
-
distance=
|
| 80 |
)
|
| 81 |
-
self.qdrant_client.
|
| 82 |
collection_name=collection_name,
|
| 83 |
-
vectors_config={"
|
| 84 |
)
|
| 85 |
|
| 86 |
-
def insert(self, key: str,
|
| 87 |
idx = str(uuid.uuid4())
|
| 88 |
payload = {"text": key}
|
| 89 |
|
| 90 |
point = PointStruct(
|
| 91 |
id=idx,
|
| 92 |
-
vector={"default":
|
| 93 |
payload=payload
|
| 94 |
)
|
| 95 |
# Insert the vector into Qdrant with the associated document
|
|
@@ -97,9 +97,25 @@ class VectorDatabase:
|
|
| 97 |
collection_name=collection_name,
|
| 98 |
points=[point]
|
| 99 |
)
|
| 100 |
-
print(f"Inserted vector with ID {idx}: {vector}")
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
def search(
|
| 104 |
self,
|
| 105 |
query_vector: np.array,
|
|
@@ -114,7 +130,7 @@ class VectorDatabase:
|
|
| 114 |
|
| 115 |
search_results = self.qdrant_client.search(
|
| 116 |
collection_name=collection_name,
|
| 117 |
-
query_vector=query_vector,
|
| 118 |
limit=k
|
| 119 |
)
|
| 120 |
return [(result.payload['text'], result.score) for result in search_results]
|
|
@@ -136,8 +152,25 @@ class VectorDatabase:
|
|
| 136 |
|
| 137 |
async def abuild_from_list(self, list_of_text: List[str]) -> "VectorDatabase":
|
| 138 |
embeddings = await self.embedding_model.async_get_embeddings(list_of_text)
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
return self
|
| 142 |
|
| 143 |
|
|
|
|
| 6 |
from aimakerspace.openai_utils.embedding import EmbeddingModel
|
| 7 |
import asyncio
|
| 8 |
from qdrant_client import models, QdrantClient
|
| 9 |
+
from qdrant_client.models import PointStruct,VectorParams,Distance,Batch,VectorStruct,Payload
|
| 10 |
|
| 11 |
collection_name = "embedding_collection"
|
| 12 |
|
|
|
|
| 76 |
self.qdrant_client = QdrantClient(":memory:")
|
| 77 |
vector_params = VectorParams(
|
| 78 |
size=embedding_model.dimensions, # vector size
|
| 79 |
+
distance=Distance.COSINE
|
| 80 |
)
|
| 81 |
+
self.qdrant_client.create_collection(
|
| 82 |
collection_name=collection_name,
|
| 83 |
+
vectors_config={"text": vector_params},
|
| 84 |
)
|
| 85 |
|
| 86 |
+
def insert(self, key: str, vectors: np.array) -> None:
|
| 87 |
idx = str(uuid.uuid4())
|
| 88 |
payload = {"text": key}
|
| 89 |
|
| 90 |
point = PointStruct(
|
| 91 |
id=idx,
|
| 92 |
+
vector={"default": vectors.tolist()},
|
| 93 |
payload=payload
|
| 94 |
)
|
| 95 |
# Insert the vector into Qdrant with the associated document
|
|
|
|
| 97 |
collection_name=collection_name,
|
| 98 |
points=[point]
|
| 99 |
)
|
| 100 |
+
# print(f"Inserted vector with ID {idx}: {vector}")
|
| 101 |
+
# self.qdrant_client.upsert(
|
| 102 |
+
# collection_name=collection_name,
|
| 103 |
+
# points= [
|
| 104 |
+
# [PointStruct(
|
| 105 |
+
# id=idx,
|
| 106 |
+
# vector=vector,
|
| 107 |
+
# payload={"text": key}
|
| 108 |
+
# )]
|
| 109 |
+
# for idx, vector in enumerate(vectors)
|
| 110 |
+
# ])
|
| 111 |
+
# self.qdrant_client.add(
|
| 112 |
+
# collection_name=collection_name,
|
| 113 |
+
# documents=[key],
|
| 114 |
+
# metadata=[],
|
| 115 |
+
# ids=str(uuid.uuid4())
|
| 116 |
+
# )
|
| 117 |
+
|
| 118 |
+
|
| 119 |
def search(
|
| 120 |
self,
|
| 121 |
query_vector: np.array,
|
|
|
|
| 130 |
|
| 131 |
search_results = self.qdrant_client.search(
|
| 132 |
collection_name=collection_name,
|
| 133 |
+
query_vector=('text',query_vector),
|
| 134 |
limit=k
|
| 135 |
)
|
| 136 |
return [(result.payload['text'], result.score) for result in search_results]
|
|
|
|
| 152 |
|
| 153 |
async def abuild_from_list(self, list_of_text: List[str]) -> "VectorDatabase":
|
| 154 |
embeddings = await self.embedding_model.async_get_embeddings(list_of_text)
|
| 155 |
+
# vs = VectorStruct()
|
| 156 |
+
# VectorStruct = Union[
|
| 157 |
+
# List[StrictFloat],
|
| 158 |
+
# List[List[StrictFloat]],
|
| 159 |
+
# Dict[StrictStr, Vector],
|
| 160 |
+
points = [
|
| 161 |
+
models.PointStruct(
|
| 162 |
+
id=str(uuid.uuid4()),
|
| 163 |
+
vector={ 'text': embedding},
|
| 164 |
+
payload={
|
| 165 |
+
"text": text
|
| 166 |
+
}
|
| 167 |
+
)
|
| 168 |
+
for text, embedding in zip(list_of_text, embeddings)
|
| 169 |
+
]
|
| 170 |
+
self.qdrant_client.upsert(
|
| 171 |
+
collection_name=collection_name,
|
| 172 |
+
points=points
|
| 173 |
+
)
|
| 174 |
return self
|
| 175 |
|
| 176 |
|