Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -39,9 +39,26 @@ def extract_text_from_pdf(pdf_stream):
|
|
| 39 |
def create_vector_db(text_chunks):
|
| 40 |
"""Embeds text chunks and adds them to FAISS index"""
|
| 41 |
global documents, index
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
documents = text_chunks
|
| 43 |
embeddings = embed_model.encode(text_chunks)
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
def search_relevant_text(query):
|
| 47 |
"""Finds the most relevant text chunk for the given query"""
|
|
|
|
| 39 |
def create_vector_db(text_chunks):
|
| 40 |
"""Embeds text chunks and adds them to FAISS index"""
|
| 41 |
global documents, index
|
| 42 |
+
|
| 43 |
+
# Reinitialize the FAISS index
|
| 44 |
+
index = faiss.IndexFlatL2(vector_dim)
|
| 45 |
+
|
| 46 |
documents = text_chunks
|
| 47 |
embeddings = embed_model.encode(text_chunks)
|
| 48 |
+
|
| 49 |
+
# Convert embeddings to np.float32 for FAISS
|
| 50 |
+
embeddings = np.array(embeddings, dtype=np.float32)
|
| 51 |
+
|
| 52 |
+
# Ensure that embeddings have the correct shape (should be 2D, with each vector having the right dimension)
|
| 53 |
+
if embeddings.ndim == 1: # If only one embedding, reshape it
|
| 54 |
+
embeddings = embeddings.reshape(1, -1)
|
| 55 |
+
|
| 56 |
+
# Add embeddings to the FAISS index
|
| 57 |
+
index.add(embeddings)
|
| 58 |
+
|
| 59 |
+
# Check if adding was successful (optional)
|
| 60 |
+
if index.ntotal == 0:
|
| 61 |
+
print("Error: FAISS index is empty after adding embeddings.")
|
| 62 |
|
| 63 |
def search_relevant_text(query):
|
| 64 |
"""Finds the most relevant text chunk for the given query"""
|