Update app.py
Browse files
app.py
CHANGED
|
@@ -39,6 +39,37 @@ def read_root(request: Request):
|
|
| 39 |
|
| 40 |
@app.post("/embed")
|
| 41 |
def embed_strings(request: EmbedRequest):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
new_documents = request.texts
|
| 43 |
new_embeddings = model.encode(new_documents)
|
| 44 |
index.add(np.array(new_embeddings))
|
|
|
|
| 39 |
|
| 40 |
@app.post("/embed")
|
| 41 |
def embed_strings(request: EmbedRequest):
|
| 42 |
+
new_documents = request.texts
|
| 43 |
+
batch_size = 20
|
| 44 |
+
|
| 45 |
+
# Split the new_documents list into batches of 10 documents
|
| 46 |
+
batches = [new_documents[i:i+batch_size] for i in range(0, len(new_documents), batch_size)]
|
| 47 |
+
|
| 48 |
+
# Perform embedding for each batch
|
| 49 |
+
new_embeddings = []
|
| 50 |
+
for batch in batches:
|
| 51 |
+
batch_embeddings = model.encode(batch)
|
| 52 |
+
new_embeddings.extend(batch_embeddings)
|
| 53 |
+
print(f"embeded {batch_size} docs")
|
| 54 |
+
|
| 55 |
+
# Handle remaining documents less than batch_size
|
| 56 |
+
remaining_docs = len(new_documents) % batch_size
|
| 57 |
+
print(f"embedind remaining {remaining_docs} docs")
|
| 58 |
+
|
| 59 |
+
if remaining_docs > 0:
|
| 60 |
+
remaining_batch = new_documents[-remaining_docs:]
|
| 61 |
+
remaining_embeddings = model.encode(remaining_batch)
|
| 62 |
+
new_embeddings.extend(remaining_embeddings)
|
| 63 |
+
|
| 64 |
+
index.add(np.array(new_embeddings))
|
| 65 |
+
new_size = index.ntotal
|
| 66 |
+
documents.extend(new_documents)
|
| 67 |
+
print(f"End embedding {len(new_documents)} docs, new DB size: {new_size}")
|
| 68 |
+
return {
|
| 69 |
+
"message": f"{len(new_documents)} new strings embedded and added to FAISS database. New size of the database: {new_size}"
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
def embed_strings_v0(request: EmbedRequest):
|
| 73 |
new_documents = request.texts
|
| 74 |
new_embeddings = model.encode(new_documents)
|
| 75 |
index.add(np.array(new_embeddings))
|