Spaces:

Penality
/

pdf-something

Build error

Penality commited on Feb 23, 2025

Commit

fd0dd62

verified ·

1 Parent(s): 9aee54a

Update app.py

updated to handle no document in embeddings

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,13 +36,15 @@ def store_document(text):
 def retrieve_document(query):
     print(f"retrieving doc based on: \n{query}")
-    query_embedding = embedding_model.encode([query])
-    _, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), 1)
-    print(f"retrieved: \n{documents[closest_idx[0][0]]}")
-    return documents[closest_idx[0][0]]
 def clean_text(text):
@@ -81,12 +83,9 @@ def chatbot(pdf_file, user_question):
         if not text:
             return "Could not extract any text from the PDF."
-    try:
-        # retrieve the document relevant to the query
-        doc = retrieve_document(user_question)
-    except Exception as e:
-        return f"Error retrieving document relevant to the query: {user_question} \n{e}"
     if doc:
         print("found doc")
         # Split into smaller chunks

 def retrieve_document(query):
     print(f"retrieving doc based on: \n{query}")
+    if len(documents) >= 1:
+        query_embedding = embedding_model.encode([query])
+        _, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), 1)
+        print(f"retrieved: \n{documents[closest_idx[0][0]]}")
+        return documents[closest_idx[0][0]]
+    return None
 def clean_text(text):
         if not text:
             return "Could not extract any text from the PDF."
+    # retrieve the document relevant to the query
+    doc = retrieve_document(user_question)
     if doc:
         print("found doc")
         # Split into smaller chunks