Spaces:
Build error
Build error
Update app.py
Browse filescreated INDEX_FILE if non-existent
app.py
CHANGED
|
@@ -40,10 +40,15 @@ os.makedirs(DOCUMENT_DIR, exist_ok=True)
|
|
| 40 |
|
| 41 |
# Load FAISS index if it exists
|
| 42 |
if os.path.exists(INDEX_FILE):
|
|
|
|
| 43 |
index = faiss.read_index(INDEX_FILE)
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# Load metadata
|
| 46 |
if os.path.exists(METADATA_FILE):
|
|
|
|
| 47 |
with open(METADATA_FILE, "r") as f:
|
| 48 |
metadata = json.load(f)
|
| 49 |
else:
|
|
@@ -73,7 +78,9 @@ def store_document(text):
|
|
| 73 |
# Update metadata with FAISS index
|
| 74 |
metadata[str(doc_index)] = filename
|
| 75 |
with open(METADATA_FILE, "w") as f:
|
| 76 |
-
|
|
|
|
|
|
|
| 77 |
|
| 78 |
# Save FAISS index properly
|
| 79 |
faiss.write_index(index, INDEX_FILE)
|
|
@@ -88,7 +95,8 @@ def retrieve_document(query):
|
|
| 88 |
_, closest_idx = index.search(query_embedding, 1)
|
| 89 |
|
| 90 |
if not closest_idx or closest_idx[0][0] not in metadata:
|
| 91 |
-
|
|
|
|
| 92 |
|
| 93 |
|
| 94 |
if closest_idx[0][0] in metadata: # Ensure a valid match
|
|
@@ -139,7 +147,7 @@ def chatbot(pdf_file, user_question):
|
|
| 139 |
doc = retrieve_document(user_question)
|
| 140 |
|
| 141 |
if doc:
|
| 142 |
-
print("found doc")
|
| 143 |
# Split into smaller chunks
|
| 144 |
chunks = split_text(doc)
|
| 145 |
|
|
|
|
| 40 |
|
| 41 |
# Load FAISS index if it exists
|
| 42 |
if os.path.exists(INDEX_FILE):
|
| 43 |
+
print(" FAISS index file exists")
|
| 44 |
index = faiss.read_index(INDEX_FILE)
|
| 45 |
+
else:
|
| 46 |
+
print(" No FAISS index found. Creating a new one.")
|
| 47 |
+
index = faiss.IndexFlatL2(embedding_dim) # Empty FAISS index
|
| 48 |
|
| 49 |
# Load metadata
|
| 50 |
if os.path.exists(METADATA_FILE):
|
| 51 |
+
print("metadata exists")
|
| 52 |
with open(METADATA_FILE, "r") as f:
|
| 53 |
metadata = json.load(f)
|
| 54 |
else:
|
|
|
|
| 78 |
# Update metadata with FAISS index
|
| 79 |
metadata[str(doc_index)] = filename
|
| 80 |
with open(METADATA_FILE, "w") as f:
|
| 81 |
+
print(metadata)
|
| 82 |
+
json.dump(metadata, f
|
| 83 |
+
print("saved Metadata")
|
| 84 |
|
| 85 |
# Save FAISS index properly
|
| 86 |
faiss.write_index(index, INDEX_FILE)
|
|
|
|
| 95 |
_, closest_idx = index.search(query_embedding, 1)
|
| 96 |
|
| 97 |
if not closest_idx or closest_idx[0][0] not in metadata:
|
| 98 |
+
print("No relevant Document found")
|
| 99 |
+
return None
|
| 100 |
|
| 101 |
|
| 102 |
if closest_idx[0][0] in metadata: # Ensure a valid match
|
|
|
|
| 147 |
doc = retrieve_document(user_question)
|
| 148 |
|
| 149 |
if doc:
|
| 150 |
+
print(f"found doc{doc}")
|
| 151 |
# Split into smaller chunks
|
| 152 |
chunks = split_text(doc)
|
| 153 |
|