Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files
db.py
CHANGED
|
@@ -69,31 +69,38 @@ class SanatanDatabase:
|
|
| 69 |
all_docs = collection.get()
|
| 70 |
matched_docs = []
|
| 71 |
|
| 72 |
-
for
|
| 73 |
all_docs["documents"], all_docs["metadatas"], all_docs["ids"]
|
| 74 |
):
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
doc_match = regex.search(normalize(d))
|
| 77 |
metadata_match = False
|
| 78 |
|
| 79 |
-
current_metadata = metadata[i] if isinstance(metadata, list) else metadata
|
| 80 |
for key, value in current_metadata.items():
|
| 81 |
if isinstance(value, str) and regex.search(normalize(value)):
|
| 82 |
metadata_match = True
|
| 83 |
break
|
| 84 |
elif isinstance(value, list):
|
| 85 |
-
|
| 86 |
-
if any(
|
| 87 |
-
isinstance(v, str) and regex.search(normalize(v))
|
| 88 |
-
for v in value
|
| 89 |
-
):
|
| 90 |
metadata_match = True
|
| 91 |
break
|
| 92 |
|
| 93 |
if doc_match or metadata_match:
|
| 94 |
matched_docs.append(
|
| 95 |
{
|
| 96 |
-
"id":
|
| 97 |
"document": d,
|
| 98 |
"metadata": current_metadata,
|
| 99 |
}
|
|
@@ -103,7 +110,6 @@ class SanatanDatabase:
|
|
| 103 |
if len(matched_docs) >= n_results:
|
| 104 |
break
|
| 105 |
|
| 106 |
-
|
| 107 |
return {
|
| 108 |
"documents": [[d["document"] for d in matched_docs]],
|
| 109 |
"ids": [[d["id"] for d in matched_docs]],
|
|
|
|
| 69 |
all_docs = collection.get()
|
| 70 |
matched_docs = []
|
| 71 |
|
| 72 |
+
for doc_list, metadata_list, doc_id_list in zip(
|
| 73 |
all_docs["documents"], all_docs["metadatas"], all_docs["ids"]
|
| 74 |
):
|
| 75 |
+
# Ensure all are lists
|
| 76 |
+
if isinstance(doc_list, str):
|
| 77 |
+
doc_list = [doc_list]
|
| 78 |
+
if isinstance(metadata_list, dict):
|
| 79 |
+
metadata_list = [metadata_list]
|
| 80 |
+
if isinstance(doc_id_list, str):
|
| 81 |
+
doc_id_list = [doc_id_list]
|
| 82 |
+
|
| 83 |
+
for i in range(len(doc_list)):
|
| 84 |
+
d = doc_list[i]
|
| 85 |
+
current_metadata = metadata_list[i]
|
| 86 |
+
current_id = doc_id_list[i]
|
| 87 |
+
|
| 88 |
doc_match = regex.search(normalize(d))
|
| 89 |
metadata_match = False
|
| 90 |
|
|
|
|
| 91 |
for key, value in current_metadata.items():
|
| 92 |
if isinstance(value, str) and regex.search(normalize(value)):
|
| 93 |
metadata_match = True
|
| 94 |
break
|
| 95 |
elif isinstance(value, list):
|
| 96 |
+
if any(isinstance(v, str) and regex.search(normalize(v)) for v in value):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
metadata_match = True
|
| 98 |
break
|
| 99 |
|
| 100 |
if doc_match or metadata_match:
|
| 101 |
matched_docs.append(
|
| 102 |
{
|
| 103 |
+
"id": current_id,
|
| 104 |
"document": d,
|
| 105 |
"metadata": current_metadata,
|
| 106 |
}
|
|
|
|
| 110 |
if len(matched_docs) >= n_results:
|
| 111 |
break
|
| 112 |
|
|
|
|
| 113 |
return {
|
| 114 |
"documents": [[d["document"] for d in matched_docs]],
|
| 115 |
"ids": [[d["id"] for d in matched_docs]],
|