Huy commited on
Commit
aeb0b1f
·
1 Parent(s): 7c8ec43

Change generate id strategy

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. llamaindex_utils.py +0 -1
  3. rag_pipeline.py +6 -4
.gitignore CHANGED
@@ -2,3 +2,4 @@ __pycache__/
2
  .ipynb_checkpoints/
3
  env/
4
  .DS_Store
 
 
2
  .ipynb_checkpoints/
3
  env/
4
  .DS_Store
5
+ pretrained/
llamaindex_utils.py CHANGED
@@ -199,7 +199,6 @@ class ColPaliRetriever(BaseRetriever):
199
  responses = await self._vector_store_client.query_points(collection_name=self._target_collection,
200
  query=query_embedding,
201
  limit=self._similarity_top_k)
202
-
203
  responses = responses.points
204
  # Parse to structured output nodes
205
  query_result = parse_to_query_result(responses)
 
199
  responses = await self._vector_store_client.query_points(collection_name=self._target_collection,
200
  query=query_embedding,
201
  limit=self._similarity_top_k)
 
202
  responses = responses.points
203
  # Parse to structured output nodes
204
  query_result = parse_to_query_result(responses)
rag_pipeline.py CHANGED
@@ -207,7 +207,8 @@ def indexDocument(file_path: str,
207
  payload = {}
208
  node_metadata = {"file_name": file_path,
209
  "page_id": i + 1}
210
- node_content = {'id_': abs(hash(file_path + str(i + 1))),
 
211
  'image': image_str,
212
  "metadata": node_metadata}
213
 
@@ -221,7 +222,7 @@ def indexDocument(file_path: str,
221
  payload["ref_doc_id"] = "None" # for Weaviate
222
 
223
  points.append(rest.PointStruct(
224
- id=node_content['id_'],
225
  vector=multivector,
226
  payload=payload,
227
  ))
@@ -300,7 +301,8 @@ async def async_indexDocument(file_path: str,
300
  payload = {}
301
  node_metadata = {"file_name": file_path,
302
  "page_id": i + 1}
303
- node_content = {'id_': abs(hash(file_path + str(i + 1))),
 
304
  'image': image_str,
305
  "metadata": node_metadata}
306
 
@@ -314,7 +316,7 @@ async def async_indexDocument(file_path: str,
314
  payload["ref_doc_id"] = "None" # for Weaviate
315
 
316
  points.append(rest.PointStruct(
317
- id=node_content['id_'],
318
  vector=multivector,
319
  payload=payload,
320
  ))
 
207
  payload = {}
208
  node_metadata = {"file_name": file_path,
209
  "page_id": i + 1}
210
+
211
+ node_content = {'id_': str(uuid.uuid5(uuid.NAMESPACE_OID, name=(file_path + str(i + 1)))),
212
  'image': image_str,
213
  "metadata": node_metadata}
214
 
 
222
  payload["ref_doc_id"] = "None" # for Weaviate
223
 
224
  points.append(rest.PointStruct(
225
+ id=node_content["id_"],
226
  vector=multivector,
227
  payload=payload,
228
  ))
 
301
  payload = {}
302
  node_metadata = {"file_name": file_path,
303
  "page_id": i + 1}
304
+
305
+ node_content = {'id_': str(uuid.uuid5(uuid.NAMESPACE_OID, name=(file_path + str(i + 1)))),
306
  'image': image_str,
307
  "metadata": node_metadata}
308
 
 
316
  payload["ref_doc_id"] = "None" # for Weaviate
317
 
318
  points.append(rest.PointStruct(
319
+ id=node_content["id_"],
320
  vector=multivector,
321
  payload=payload,
322
  ))