Spaces:
Runtime error
Runtime error
Huy commited on
Commit ·
aeb0b1f
1
Parent(s): 7c8ec43
Change generate id strategy
Browse files- .gitignore +1 -0
- llamaindex_utils.py +0 -1
- rag_pipeline.py +6 -4
.gitignore
CHANGED
|
@@ -2,3 +2,4 @@ __pycache__/
|
|
| 2 |
.ipynb_checkpoints/
|
| 3 |
env/
|
| 4 |
.DS_Store
|
|
|
|
|
|
| 2 |
.ipynb_checkpoints/
|
| 3 |
env/
|
| 4 |
.DS_Store
|
| 5 |
+
pretrained/
|
llamaindex_utils.py
CHANGED
|
@@ -199,7 +199,6 @@ class ColPaliRetriever(BaseRetriever):
|
|
| 199 |
responses = await self._vector_store_client.query_points(collection_name=self._target_collection,
|
| 200 |
query=query_embedding,
|
| 201 |
limit=self._similarity_top_k)
|
| 202 |
-
|
| 203 |
responses = responses.points
|
| 204 |
# Parse to structured output nodes
|
| 205 |
query_result = parse_to_query_result(responses)
|
|
|
|
| 199 |
responses = await self._vector_store_client.query_points(collection_name=self._target_collection,
|
| 200 |
query=query_embedding,
|
| 201 |
limit=self._similarity_top_k)
|
|
|
|
| 202 |
responses = responses.points
|
| 203 |
# Parse to structured output nodes
|
| 204 |
query_result = parse_to_query_result(responses)
|
rag_pipeline.py
CHANGED
|
@@ -207,7 +207,8 @@ def indexDocument(file_path: str,
|
|
| 207 |
payload = {}
|
| 208 |
node_metadata = {"file_name": file_path,
|
| 209 |
"page_id": i + 1}
|
| 210 |
-
|
|
|
|
| 211 |
'image': image_str,
|
| 212 |
"metadata": node_metadata}
|
| 213 |
|
|
@@ -221,7 +222,7 @@ def indexDocument(file_path: str,
|
|
| 221 |
payload["ref_doc_id"] = "None" # for Weaviate
|
| 222 |
|
| 223 |
points.append(rest.PointStruct(
|
| 224 |
-
id=node_content[
|
| 225 |
vector=multivector,
|
| 226 |
payload=payload,
|
| 227 |
))
|
|
@@ -300,7 +301,8 @@ async def async_indexDocument(file_path: str,
|
|
| 300 |
payload = {}
|
| 301 |
node_metadata = {"file_name": file_path,
|
| 302 |
"page_id": i + 1}
|
| 303 |
-
|
|
|
|
| 304 |
'image': image_str,
|
| 305 |
"metadata": node_metadata}
|
| 306 |
|
|
@@ -314,7 +316,7 @@ async def async_indexDocument(file_path: str,
|
|
| 314 |
payload["ref_doc_id"] = "None" # for Weaviate
|
| 315 |
|
| 316 |
points.append(rest.PointStruct(
|
| 317 |
-
id=node_content[
|
| 318 |
vector=multivector,
|
| 319 |
payload=payload,
|
| 320 |
))
|
|
|
|
| 207 |
payload = {}
|
| 208 |
node_metadata = {"file_name": file_path,
|
| 209 |
"page_id": i + 1}
|
| 210 |
+
|
| 211 |
+
node_content = {'id_': str(uuid.uuid5(uuid.NAMESPACE_OID, name=(file_path + str(i + 1)))),
|
| 212 |
'image': image_str,
|
| 213 |
"metadata": node_metadata}
|
| 214 |
|
|
|
|
| 222 |
payload["ref_doc_id"] = "None" # for Weaviate
|
| 223 |
|
| 224 |
points.append(rest.PointStruct(
|
| 225 |
+
id=node_content["id_"],
|
| 226 |
vector=multivector,
|
| 227 |
payload=payload,
|
| 228 |
))
|
|
|
|
| 301 |
payload = {}
|
| 302 |
node_metadata = {"file_name": file_path,
|
| 303 |
"page_id": i + 1}
|
| 304 |
+
|
| 305 |
+
node_content = {'id_': str(uuid.uuid5(uuid.NAMESPACE_OID, name=(file_path + str(i + 1)))),
|
| 306 |
'image': image_str,
|
| 307 |
"metadata": node_metadata}
|
| 308 |
|
|
|
|
| 316 |
payload["ref_doc_id"] = "None" # for Weaviate
|
| 317 |
|
| 318 |
points.append(rest.PointStruct(
|
| 319 |
+
id=node_content["id_"],
|
| 320 |
vector=multivector,
|
| 321 |
payload=payload,
|
| 322 |
))
|