Update app.py
Browse files
app.py
CHANGED
|
@@ -188,22 +188,37 @@ def load_vector_store(embeddings):
|
|
| 188 |
if len(matches) > 100:
|
| 189 |
logger.info(f" Found {len(matches)} potential document fragments")
|
| 190 |
|
| 191 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
new_docstore_dict = {}
|
| 193 |
index_to_docstore_id = {}
|
| 194 |
|
| 195 |
-
|
|
|
|
| 196 |
try:
|
| 197 |
-
content
|
| 198 |
-
if len(
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
continue
|
| 208 |
|
| 209 |
logger.info(f" ✅ Reconstructed {len(new_docstore_dict)} documents from raw data")
|
|
|
|
| 188 |
if len(matches) > 100:
|
| 189 |
logger.info(f" Found {len(matches)} potential document fragments")
|
| 190 |
|
| 191 |
+
# Get total vectors in index
|
| 192 |
+
num_vectors = index.ntotal
|
| 193 |
+
logger.info(f" FAISS index has {num_vectors} vectors")
|
| 194 |
+
|
| 195 |
+
# Create documents matching the number of vectors
|
| 196 |
new_docstore_dict = {}
|
| 197 |
index_to_docstore_id = {}
|
| 198 |
|
| 199 |
+
# Use the actual number of vectors, not extracted matches
|
| 200 |
+
for idx in range(min(num_vectors, len(matches))):
|
| 201 |
try:
|
| 202 |
+
# Get content from matches
|
| 203 |
+
if idx < len(matches):
|
| 204 |
+
content = matches[idx].decode('utf-8', errors='ignore').strip()
|
| 205 |
+
else:
|
| 206 |
+
content = f"Fashion document {idx}"
|
| 207 |
+
|
| 208 |
+
if len(content) < 50:
|
| 209 |
+
content = f"Fashion advice and style guide entry {idx}"
|
| 210 |
+
|
| 211 |
+
# Create document with string ID
|
| 212 |
+
doc_id = str(idx)
|
| 213 |
+
new_doc = Document(
|
| 214 |
+
page_content=content,
|
| 215 |
+
metadata={"source": "reconstructed"}
|
| 216 |
+
)
|
| 217 |
+
new_docstore_dict[doc_id] = new_doc
|
| 218 |
+
# CRITICAL: Use string keys for index_to_docstore_id
|
| 219 |
+
index_to_docstore_id[str(idx)] = doc_id
|
| 220 |
+
except Exception as e:
|
| 221 |
+
logger.warning(f" Error creating doc {idx}: {e}")
|
| 222 |
continue
|
| 223 |
|
| 224 |
logger.info(f" ✅ Reconstructed {len(new_docstore_dict)} documents from raw data")
|