Update app/data_indexing.py
Browse files- app/data_indexing.py +3 -11
app/data_indexing.py
CHANGED
|
@@ -58,22 +58,13 @@ class DataIndexer:
|
|
| 58 |
return vectorstore
|
| 59 |
|
| 60 |
def index_data(self, docs, batch_size=32):
|
| 61 |
-
|
| 62 |
-
with open(self.source_file, 'a') as file:
|
| 63 |
-
for doc in docs:
|
| 64 |
-
file.writelines(doc.metadata['source'] + '\n')
|
| 65 |
-
|
| 66 |
for i in range(0, len(docs), batch_size):
|
| 67 |
batch = docs[i: i + batch_size]
|
| 68 |
values = self.embedding_client.embed_documents([
|
| 69 |
doc.page_content for doc in batch
|
| 70 |
])
|
| 71 |
|
| 72 |
-
# values = self.embedding_client.feature_extraction([
|
| 73 |
-
# doc.page_content for doc in batch
|
| 74 |
-
# ])
|
| 75 |
vector_ids = [str(uuid.uuid4()) for _ in batch]
|
| 76 |
-
|
| 77 |
metadatas = [{
|
| 78 |
'text': doc.page_content,
|
| 79 |
**doc.metadata
|
|
@@ -83,7 +74,8 @@ class DataIndexer:
|
|
| 83 |
'id': vector_id,
|
| 84 |
'values': value,
|
| 85 |
'metadata': metadata
|
| 86 |
-
} for vector_id, value, metadata in zip(vector_ids, values, metadatas
|
|
|
|
| 87 |
|
| 88 |
try:
|
| 89 |
upsert_response = self.index.upsert(vectors=vectors)
|
|
@@ -91,7 +83,7 @@ class DataIndexer:
|
|
| 91 |
except Exception as e:
|
| 92 |
print(e)
|
| 93 |
|
| 94 |
-
def search(text_query, top_k=5):
|
| 95 |
vector = self.embedding_client.embed_query(text_query)
|
| 96 |
result = self.index.query(
|
| 97 |
vector=vector,
|
|
|
|
| 58 |
return vectorstore
|
| 59 |
|
| 60 |
def index_data(self, docs, batch_size=32):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
for i in range(0, len(docs), batch_size):
|
| 62 |
batch = docs[i: i + batch_size]
|
| 63 |
values = self.embedding_client.embed_documents([
|
| 64 |
doc.page_content for doc in batch
|
| 65 |
])
|
| 66 |
|
|
|
|
|
|
|
|
|
|
| 67 |
vector_ids = [str(uuid.uuid4()) for _ in batch]
|
|
|
|
| 68 |
metadatas = [{
|
| 69 |
'text': doc.page_content,
|
| 70 |
**doc.metadata
|
|
|
|
| 74 |
'id': vector_id,
|
| 75 |
'values': value,
|
| 76 |
'metadata': metadata
|
| 77 |
+
} for vector_id, value, metadata in zip(vector_ids, values, metadatas
|
| 78 |
+
)]
|
| 79 |
|
| 80 |
try:
|
| 81 |
upsert_response = self.index.upsert(vectors=vectors)
|
|
|
|
| 83 |
except Exception as e:
|
| 84 |
print(e)
|
| 85 |
|
| 86 |
+
def search(self,text_query, top_k=5):
|
| 87 |
vector = self.embedding_client.embed_query(text_query)
|
| 88 |
result = self.index.query(
|
| 89 |
vector=vector,
|