cryogenic22 commited on
Commit
3537d63
·
verified ·
1 Parent(s): 036a620

Update utils/vector_store.py

Browse files
Files changed (1) hide show
  1. utils/vector_store.py +20 -35
utils/vector_store.py CHANGED
@@ -112,47 +112,32 @@ class VectorStore:
112
 
113
  return chunks
114
 
115
- def similarity_search(
116
- self,
117
- query: str,
118
- k: int = 5,
119
- threshold: float = 0.5,
120
- filter_criteria: Dict[str, List] = None
121
- ) -> List[Dict]:
122
- """Enhanced similarity search with filtering and re-ranking."""
123
- # Encode query
124
  query_vector = self.model.encode(query, convert_to_tensor=True)
125
-
126
- # Calculate similarities and filter results
127
  results = []
 
128
  for doc in self.vectors:
129
- # Apply filters if specified
130
- if filter_criteria:
131
- skip = False
132
- for key, values in filter_criteria.items():
133
- doc_value = self._get_nested_dict_value(doc["metadata"], key)
134
- if doc_value not in values:
135
- skip = True
136
- break
137
- if skip:
138
- continue
139
-
140
- # Calculate similarity
141
  similarity = util.pytorch_cos_sim(query_vector, doc["vector"]).item()
142
- if similarity >= threshold:
143
- results.append({
144
- **doc,
145
- "score": similarity
146
- })
147
-
148
- # Sort by similarity score
149
  results.sort(key=lambda x: x["score"], reverse=True)
150
-
151
- # Re-rank results based on chunk position and metadata
152
- reranked_results = self._rerank_results(results[:k*2], query)
153
-
154
- return reranked_results[:k]
155
 
 
 
 
 
156
  def _rerank_results(self, results: List[Dict], query: str) -> List[Dict]:
157
  """Re-rank results considering chunk position and metadata relevance."""
158
  for result in results:
 
112
 
113
  return chunks
114
 
115
+ def similarity_search(self, query: str, k: int = 3) -> List[Dict]:
116
+ """Perform similarity search with error handling."""
117
+ try:
118
+ # If no vectors are stored yet, return empty list
119
+ if not self.vectors:
120
+ return []
121
+
 
 
122
  query_vector = self.model.encode(query, convert_to_tensor=True)
 
 
123
  results = []
124
+
125
  for doc in self.vectors:
 
 
 
 
 
 
 
 
 
 
 
 
126
  similarity = util.pytorch_cos_sim(query_vector, doc["vector"]).item()
127
+ results.append({
128
+ "text": doc["text"],
129
+ "metadata": doc["metadata"],
130
+ "score": similarity
131
+ })
132
+
133
+ # Sort by similarity and return top k
134
  results.sort(key=lambda x: x["score"], reverse=True)
135
+ return results[:k]
 
 
 
 
136
 
137
+ except Exception as e:
138
+ st.error(f"Error in similarity search: {str(e)}")
139
+ return []
140
+
141
  def _rerank_results(self, results: List[Dict], query: str) -> List[Dict]:
142
  """Re-rank results considering chunk position and metadata relevance."""
143
  for result in results: