Spaces:
Build error
Build error
Update app.py
Browse filesupdated to handle no document in embeddings
app.py
CHANGED
|
@@ -36,13 +36,15 @@ def store_document(text):
|
|
| 36 |
|
| 37 |
def retrieve_document(query):
|
| 38 |
print(f"retrieving doc based on: \n{query}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
print(f"retrieved: \n{documents[closest_idx[0][0]]}")
|
| 44 |
-
|
| 45 |
-
return documents[closest_idx[0][0]]
|
| 46 |
|
| 47 |
|
| 48 |
def clean_text(text):
|
|
@@ -81,12 +83,9 @@ def chatbot(pdf_file, user_question):
|
|
| 81 |
if not text:
|
| 82 |
return "Could not extract any text from the PDF."
|
| 83 |
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
except Exception as e:
|
| 88 |
-
return f"Error retrieving document relevant to the query: {user_question} \n{e}"
|
| 89 |
-
|
| 90 |
if doc:
|
| 91 |
print("found doc")
|
| 92 |
# Split into smaller chunks
|
|
|
|
| 36 |
|
| 37 |
def retrieve_document(query):
|
| 38 |
print(f"retrieving doc based on: \n{query}")
|
| 39 |
+
|
| 40 |
+
if len(documents) >= 1:
|
| 41 |
+
query_embedding = embedding_model.encode([query])
|
| 42 |
+
_, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), 1)
|
| 43 |
+
|
| 44 |
+
print(f"retrieved: \n{documents[closest_idx[0][0]]}")
|
| 45 |
|
| 46 |
+
return documents[closest_idx[0][0]]
|
| 47 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
|
| 50 |
def clean_text(text):
|
|
|
|
| 83 |
if not text:
|
| 84 |
return "Could not extract any text from the PDF."
|
| 85 |
|
| 86 |
+
# retrieve the document relevant to the query
|
| 87 |
+
doc = retrieve_document(user_question)
|
| 88 |
+
|
|
|
|
|
|
|
|
|
|
| 89 |
if doc:
|
| 90 |
print("found doc")
|
| 91 |
# Split into smaller chunks
|