vikramvasudevan commited on
Commit
5c1cea6
·
verified ·
1 Parent(s): 8d1a737

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. db.py +16 -10
db.py CHANGED
@@ -69,31 +69,38 @@ class SanatanDatabase:
69
  all_docs = collection.get()
70
  matched_docs = []
71
 
72
- for doc, metadata, ids in zip(
73
  all_docs["documents"], all_docs["metadatas"], all_docs["ids"]
74
  ):
75
- for i, d in enumerate(doc):
 
 
 
 
 
 
 
 
 
 
 
 
76
  doc_match = regex.search(normalize(d))
77
  metadata_match = False
78
 
79
- current_metadata = metadata[i] if isinstance(metadata, list) else metadata
80
  for key, value in current_metadata.items():
81
  if isinstance(value, str) and regex.search(normalize(value)):
82
  metadata_match = True
83
  break
84
  elif isinstance(value, list):
85
- # Check within list of strings (e.g., divya_desams)
86
- if any(
87
- isinstance(v, str) and regex.search(normalize(v))
88
- for v in value
89
- ):
90
  metadata_match = True
91
  break
92
 
93
  if doc_match or metadata_match:
94
  matched_docs.append(
95
  {
96
- "id": ids[i],
97
  "document": d,
98
  "metadata": current_metadata,
99
  }
@@ -103,7 +110,6 @@ class SanatanDatabase:
103
  if len(matched_docs) >= n_results:
104
  break
105
 
106
-
107
  return {
108
  "documents": [[d["document"] for d in matched_docs]],
109
  "ids": [[d["id"] for d in matched_docs]],
 
69
  all_docs = collection.get()
70
  matched_docs = []
71
 
72
+ for doc_list, metadata_list, doc_id_list in zip(
73
  all_docs["documents"], all_docs["metadatas"], all_docs["ids"]
74
  ):
75
+ # Ensure all are lists
76
+ if isinstance(doc_list, str):
77
+ doc_list = [doc_list]
78
+ if isinstance(metadata_list, dict):
79
+ metadata_list = [metadata_list]
80
+ if isinstance(doc_id_list, str):
81
+ doc_id_list = [doc_id_list]
82
+
83
+ for i in range(len(doc_list)):
84
+ d = doc_list[i]
85
+ current_metadata = metadata_list[i]
86
+ current_id = doc_id_list[i]
87
+
88
  doc_match = regex.search(normalize(d))
89
  metadata_match = False
90
 
 
91
  for key, value in current_metadata.items():
92
  if isinstance(value, str) and regex.search(normalize(value)):
93
  metadata_match = True
94
  break
95
  elif isinstance(value, list):
96
+ if any(isinstance(v, str) and regex.search(normalize(v)) for v in value):
 
 
 
 
97
  metadata_match = True
98
  break
99
 
100
  if doc_match or metadata_match:
101
  matched_docs.append(
102
  {
103
+ "id": current_id,
104
  "document": d,
105
  "metadata": current_metadata,
106
  }
 
110
  if len(matched_docs) >= n_results:
111
  break
112
 
 
113
  return {
114
  "documents": [[d["document"] for d in matched_docs]],
115
  "ids": [[d["id"] for d in matched_docs]],