Spaces:
Running
Running
Update custom_utils.py
Browse files- custom_utils.py +11 -16
custom_utils.py
CHANGED
|
@@ -55,9 +55,9 @@ def rag_retrieval_advanced(openai_api_key,
|
|
| 55 |
# 2) Weighted average review, sorted in descending order
|
| 56 |
|
| 57 |
additional_stages = [
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
]
|
| 62 |
|
| 63 |
retrieval_result = vector_search_advanced(
|
|
@@ -139,7 +139,7 @@ def vector_search_naive(openai_api_key,
|
|
| 139 |
}
|
| 140 |
}
|
| 141 |
|
| 142 |
-
pipeline = [vector_search_stage,
|
| 143 |
|
| 144 |
return invoke_search(db, collection, pipeline)
|
| 145 |
|
|
@@ -172,24 +172,20 @@ def vector_search_advanced(openai_api_key,
|
|
| 172 |
}
|
| 173 |
}
|
| 174 |
|
| 175 |
-
pipeline = [vector_search_and_filter_stage,
|
| 176 |
|
| 177 |
return invoke_search(db, collection, pipeline)
|
| 178 |
|
| 179 |
-
def
|
| 180 |
return {
|
| 181 |
"$unset": "description_embedding"
|
| 182 |
}
|
| 183 |
|
| 184 |
-
def
|
| 185 |
return {
|
| 186 |
"$project": {
|
| 187 |
-
"_id": 1,
|
| 188 |
"id": 1,
|
| 189 |
"listing_url": 1,
|
| 190 |
-
"scrape_id": 1,
|
| 191 |
-
"last_scraped": 1,
|
| 192 |
-
"source": 1,
|
| 193 |
"name": 1,
|
| 194 |
"description": 1,
|
| 195 |
"neighborhood_overview": 1,
|
|
@@ -240,7 +236,6 @@ def get_project_fields_stage():
|
|
| 240 |
"availability_60": 1,
|
| 241 |
"availability_90": 1,
|
| 242 |
"availability_365": 1,
|
| 243 |
-
"calendar_last_scraped": 1,
|
| 244 |
"number_of_reviews": 1,
|
| 245 |
"number_of_reviews_ltm": 1,
|
| 246 |
"number_of_reviews_l30d": 1,
|
|
@@ -263,7 +258,7 @@ def get_project_fields_stage():
|
|
| 263 |
}
|
| 264 |
}
|
| 265 |
|
| 266 |
-
def
|
| 267 |
return {
|
| 268 |
"$match": {
|
| 269 |
"accommodates": { "$eq": 2},
|
|
@@ -271,7 +266,7 @@ def get_filter_result_stage():
|
|
| 271 |
}
|
| 272 |
}
|
| 273 |
|
| 274 |
-
def
|
| 275 |
return {
|
| 276 |
"$addFields": {
|
| 277 |
"averageReview": {
|
|
@@ -294,7 +289,7 @@ def get_average_review_and_review_count_stage():
|
|
| 294 |
}
|
| 295 |
}
|
| 296 |
|
| 297 |
-
def
|
| 298 |
return {
|
| 299 |
"$addFields": {
|
| 300 |
"weightedAverageReview": {
|
|
@@ -306,7 +301,7 @@ def get_weighting_stage():
|
|
| 306 |
}
|
| 307 |
}
|
| 308 |
|
| 309 |
-
def
|
| 310 |
return {
|
| 311 |
"$sort": {"weightedAverageReview": -1}
|
| 312 |
}
|
|
|
|
| 55 |
# 2) Weighted average review, sorted in descending order
|
| 56 |
|
| 57 |
additional_stages = [
|
| 58 |
+
get_stage_average_review_and_review_count(),
|
| 59 |
+
get_stage_weighting(),
|
| 60 |
+
get_stage_sorting()
|
| 61 |
]
|
| 62 |
|
| 63 |
retrieval_result = vector_search_advanced(
|
|
|
|
| 139 |
}
|
| 140 |
}
|
| 141 |
|
| 142 |
+
pipeline = [vector_search_stage, get_stage_include_fields()]
|
| 143 |
|
| 144 |
return invoke_search(db, collection, pipeline)
|
| 145 |
|
|
|
|
| 172 |
}
|
| 173 |
}
|
| 174 |
|
| 175 |
+
pipeline = [vector_search_and_filter_stage, get_stage_include_fields()] + additional_stages
|
| 176 |
|
| 177 |
return invoke_search(db, collection, pipeline)
|
| 178 |
|
| 179 |
+
def get_stage_exclude_fields():
|
| 180 |
return {
|
| 181 |
"$unset": "description_embedding"
|
| 182 |
}
|
| 183 |
|
| 184 |
+
def get_stage_include_fields():
|
| 185 |
return {
|
| 186 |
"$project": {
|
|
|
|
| 187 |
"id": 1,
|
| 188 |
"listing_url": 1,
|
|
|
|
|
|
|
|
|
|
| 189 |
"name": 1,
|
| 190 |
"description": 1,
|
| 191 |
"neighborhood_overview": 1,
|
|
|
|
| 236 |
"availability_60": 1,
|
| 237 |
"availability_90": 1,
|
| 238 |
"availability_365": 1,
|
|
|
|
| 239 |
"number_of_reviews": 1,
|
| 240 |
"number_of_reviews_ltm": 1,
|
| 241 |
"number_of_reviews_l30d": 1,
|
|
|
|
| 258 |
}
|
| 259 |
}
|
| 260 |
|
| 261 |
+
def get_stage_filter_result():
|
| 262 |
return {
|
| 263 |
"$match": {
|
| 264 |
"accommodates": { "$eq": 2},
|
|
|
|
| 266 |
}
|
| 267 |
}
|
| 268 |
|
| 269 |
+
def get_stage_average_review_and_review_count():
|
| 270 |
return {
|
| 271 |
"$addFields": {
|
| 272 |
"averageReview": {
|
|
|
|
| 289 |
}
|
| 290 |
}
|
| 291 |
|
| 292 |
+
def get_stage_weighting():
|
| 293 |
return {
|
| 294 |
"$addFields": {
|
| 295 |
"weightedAverageReview": {
|
|
|
|
| 301 |
}
|
| 302 |
}
|
| 303 |
|
| 304 |
+
def get_stage_sorting():
|
| 305 |
return {
|
| 306 |
"$sort": {"weightedAverageReview": -1}
|
| 307 |
}
|