Spaces:
Running
Running
Update custom_utils.py
Browse files- custom_utils.py +17 -9
custom_utils.py
CHANGED
|
@@ -186,10 +186,6 @@ def vector_search_naive(openai_api_key,
|
|
| 186 |
}
|
| 187 |
}
|
| 188 |
|
| 189 |
-
#remove_embedding_stage = {
|
| 190 |
-
# "$unset": "description_embedding"
|
| 191 |
-
#}
|
| 192 |
-
|
| 193 |
pipeline = [vector_search_stage, get_remove_embedding_stage()]
|
| 194 |
|
| 195 |
return invoke_search(collection, pipeline)
|
|
@@ -222,11 +218,7 @@ def vector_search_advanced(openai_api_key,
|
|
| 222 |
},
|
| 223 |
}
|
| 224 |
}
|
| 225 |
-
|
| 226 |
-
#remove_embedding_stage = {
|
| 227 |
-
# "$unset": "description_embedding"
|
| 228 |
-
#}
|
| 229 |
-
|
| 230 |
pipeline = [vector_search_stage, get_remove_embedding_stage()] + additional_stages
|
| 231 |
|
| 232 |
return invoke_search(collection, pipeline)
|
|
@@ -238,8 +230,24 @@ def get_remove_embedding_stage():
|
|
| 238 |
|
| 239 |
def invoke_search(collection, pipeline):
|
| 240 |
results = collection.aggregate(pipeline)
|
|
|
|
|
|
|
|
|
|
| 241 |
return list(results)
|
| 242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
def get_text_embedding(openai_api_key, text):
|
| 244 |
if not text or not isinstance(text, str):
|
| 245 |
return None
|
|
|
|
| 186 |
}
|
| 187 |
}
|
| 188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
pipeline = [vector_search_stage, get_remove_embedding_stage()]
|
| 190 |
|
| 191 |
return invoke_search(collection, pipeline)
|
|
|
|
| 218 |
},
|
| 219 |
}
|
| 220 |
}
|
| 221 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
pipeline = [vector_search_stage, get_remove_embedding_stage()] + additional_stages
|
| 223 |
|
| 224 |
return invoke_search(collection, pipeline)
|
|
|
|
| 230 |
|
| 231 |
def invoke_search(collection, pipeline):
|
| 232 |
results = collection.aggregate(pipeline)
|
| 233 |
+
|
| 234 |
+
print(f"MongoDB execution time: {get_millis_elapsed()} millis")
|
| 235 |
+
|
| 236 |
return list(results)
|
| 237 |
|
| 238 |
+
def get_millis_elapsed()
|
| 239 |
+
explain_query_execution = db.command(
|
| 240 |
+
"explain", {
|
| 241 |
+
"aggregate": collection.name,
|
| 242 |
+
"pipeline": pipeline,
|
| 243 |
+
"cursor": {}
|
| 244 |
+
},
|
| 245 |
+
verbosity='executionStats')
|
| 246 |
+
|
| 247 |
+
explain_vector_search = explain_query_execution["stages"][0]["$vectorSearch"]
|
| 248 |
+
|
| 249 |
+
return explain_vector_search["explain"]["collectStats"]["millisElapsed"]
|
| 250 |
+
|
| 251 |
def get_text_embedding(openai_api_key, text):
|
| 252 |
if not text or not isinstance(text, str):
|
| 253 |
return None
|