wang16888 commited on
Commit
f79c825
·
verified ·
1 Parent(s): f2283f1

Update app/data_indexing.py

Browse files
Files changed (1) hide show
  1. app/data_indexing.py +3 -11
app/data_indexing.py CHANGED
@@ -58,22 +58,13 @@ class DataIndexer:
58
  return vectorstore
59
 
60
  def index_data(self, docs, batch_size=32):
61
-
62
- with open(self.source_file, 'a') as file:
63
- for doc in docs:
64
- file.writelines(doc.metadata['source'] + '\n')
65
-
66
  for i in range(0, len(docs), batch_size):
67
  batch = docs[i: i + batch_size]
68
  values = self.embedding_client.embed_documents([
69
  doc.page_content for doc in batch
70
  ])
71
 
72
- # values = self.embedding_client.feature_extraction([
73
- # doc.page_content for doc in batch
74
- # ])
75
  vector_ids = [str(uuid.uuid4()) for _ in batch]
76
-
77
  metadatas = [{
78
  'text': doc.page_content,
79
  **doc.metadata
@@ -83,7 +74,8 @@ class DataIndexer:
83
  'id': vector_id,
84
  'values': value,
85
  'metadata': metadata
86
- } for vector_id, value, metadata in zip(vector_ids, values, metadatas)]
 
87
 
88
  try:
89
  upsert_response = self.index.upsert(vectors=vectors)
@@ -91,7 +83,7 @@ class DataIndexer:
91
  except Exception as e:
92
  print(e)
93
 
94
- def search(text_query, top_k=5):
95
  vector = self.embedding_client.embed_query(text_query)
96
  result = self.index.query(
97
  vector=vector,
 
58
  return vectorstore
59
 
60
  def index_data(self, docs, batch_size=32):
 
 
 
 
 
61
  for i in range(0, len(docs), batch_size):
62
  batch = docs[i: i + batch_size]
63
  values = self.embedding_client.embed_documents([
64
  doc.page_content for doc in batch
65
  ])
66
 
 
 
 
67
  vector_ids = [str(uuid.uuid4()) for _ in batch]
 
68
  metadatas = [{
69
  'text': doc.page_content,
70
  **doc.metadata
 
74
  'id': vector_id,
75
  'values': value,
76
  'metadata': metadata
77
+ } for vector_id, value, metadata in zip(vector_ids, values, metadatas
78
+ )]
79
 
80
  try:
81
  upsert_response = self.index.upsert(vectors=vectors)
 
83
  except Exception as e:
84
  print(e)
85
 
86
+ def search(self,text_query, top_k=5):
87
  vector = self.embedding_client.embed_query(text_query)
88
  result = self.index.query(
89
  vector=vector,