Spaces:
Running
Running
Commit
·
fbe31d6
1
Parent(s):
a82bbdb
fyp
Browse files- app/ai/services/search_service.py +32 -85
- app/routes/search.py +13 -11
app/ai/services/search_service.py
CHANGED
|
@@ -412,18 +412,7 @@ async def search_mongo_then_qdrant(
|
|
| 412 |
) -> Tuple[List[Dict], str]:
|
| 413 |
"""
|
| 414 |
Strategy: Filter by location/price in MongoDB first, then use Qdrant for semantic ranking.
|
| 415 |
-
|
| 416 |
-
Use case: "house close to international school in Calavi"
|
| 417 |
-
- Step 1: MongoDB filters for Calavi
|
| 418 |
-
- Step 2: Qdrant semantic search within those results for "close to school"
|
| 419 |
-
|
| 420 |
-
Args:
|
| 421 |
-
user_query: Original user query for semantic understanding
|
| 422 |
-
search_params: Extracted parameters (must have location or price)
|
| 423 |
-
limit: Max results to return
|
| 424 |
-
|
| 425 |
-
Returns:
|
| 426 |
-
Tuple of (results, inferred_currency)
|
| 427 |
"""
|
| 428 |
|
| 429 |
logger.info("MONGO_THEN_QDRANT strategy", location=search_params.get("location"))
|
|
@@ -437,12 +426,11 @@ async def search_mongo_then_qdrant(
|
|
| 437 |
try:
|
| 438 |
db = await get_db()
|
| 439 |
|
| 440 |
-
# Step 1: Build MongoDB filter
|
| 441 |
mongo_query = {"status": "active"}
|
| 442 |
|
| 443 |
if search_params.get("location"):
|
| 444 |
-
location = search_params["location"]
|
| 445 |
-
mongo_query["location"] = {"$regex": location, "$options": "i"}
|
| 446 |
|
| 447 |
if search_params.get("min_price"):
|
| 448 |
mongo_query["price"] = {"$gte": search_params["min_price"]}
|
|
@@ -465,7 +453,7 @@ async def search_mongo_then_qdrant(
|
|
| 465 |
logger.info("MongoDB pre-filter", query=mongo_query)
|
| 466 |
|
| 467 |
# Fetch more results than needed for semantic filtering
|
| 468 |
-
cursor = db.listings.find(mongo_query).limit(limit *
|
| 469 |
mongo_results = []
|
| 470 |
async for doc in cursor:
|
| 471 |
if "_id" in doc:
|
|
@@ -474,29 +462,25 @@ async def search_mongo_then_qdrant(
|
|
| 474 |
|
| 475 |
logger.info(f"MongoDB returned {len(mongo_results)} candidates")
|
| 476 |
|
|
|
|
| 477 |
if not mongo_results:
|
| 478 |
-
# No results from MongoDB filter
|
| 479 |
currency = "XOF"
|
| 480 |
if search_params.get("location"):
|
| 481 |
currency, _ = await infer_currency_from_location(search_params["location"])
|
| 482 |
return [], currency
|
| 483 |
|
| 484 |
# Step 2: Use Qdrant to semantically rank these MongoDB results
|
| 485 |
-
# Get their IDs
|
| 486 |
mongo_ids = [doc.get("_id") for doc in mongo_results]
|
| 487 |
|
| 488 |
-
# Embed
|
| 489 |
query_vector = await embed_query(user_query)
|
| 490 |
|
| 491 |
if not query_vector:
|
| 492 |
-
#
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
if search_params.get("location"):
|
| 496 |
-
currency, _ = await infer_currency_from_location(search_params["location"])
|
| 497 |
-
return mongo_results[:limit], currency
|
| 498 |
|
| 499 |
-
# Search Qdrant
|
| 500 |
from qdrant_client.models import Filter, FieldCondition, MatchAny
|
| 501 |
|
| 502 |
id_filter = Filter(
|
|
@@ -516,7 +500,6 @@ async def search_mongo_then_qdrant(
|
|
| 516 |
with_payload=True
|
| 517 |
)
|
| 518 |
|
| 519 |
-
# Format results
|
| 520 |
final_results = []
|
| 521 |
for point in qdrant_results.points:
|
| 522 |
listing = dict(point.payload)
|
|
@@ -524,13 +507,11 @@ async def search_mongo_then_qdrant(
|
|
| 524 |
listing["_search_strategy"] = "MONGO_THEN_QDRANT"
|
| 525 |
final_results.append(listing)
|
| 526 |
|
| 527 |
-
logger.info(f"Qdrant semantic ranking returned {len(final_results)} results")
|
| 528 |
-
|
| 529 |
# Infer currency
|
| 530 |
currency = "XOF"
|
| 531 |
if search_params.get("location"):
|
| 532 |
currency, _ = await infer_currency_from_location(search_params["location"])
|
| 533 |
-
|
| 534 |
return final_results, currency
|
| 535 |
|
| 536 |
except Exception as e:
|
|
@@ -544,96 +525,62 @@ async def search_qdrant_then_mongo(
|
|
| 544 |
limit: int = 10
|
| 545 |
) -> Tuple[List[Dict], str]:
|
| 546 |
"""
|
| 547 |
-
Strategy: Semantic search first
|
| 548 |
-
|
| 549 |
-
Use case: "modern luxurious apartment" (semantic primary, filters secondary)
|
| 550 |
-
- Step 1: Qdrant semantic search for "modern luxurious"
|
| 551 |
-
- Step 2: Apply MongoDB filters to those results
|
| 552 |
-
|
| 553 |
-
Args:
|
| 554 |
-
user_query: Original user query for semantic search
|
| 555 |
-
search_params: Extracted parameters for filtering
|
| 556 |
-
limit: Max results to return
|
| 557 |
-
|
| 558 |
-
Returns:
|
| 559 |
-
Tuple of (results, inferred_currency)
|
| 560 |
"""
|
| 561 |
|
| 562 |
logger.info("QDRANT_THEN_MONGO strategy")
|
| 563 |
|
| 564 |
if not qdrant_client:
|
| 565 |
-
logger.error("Qdrant client not available")
|
| 566 |
return [], "XOF"
|
| 567 |
|
| 568 |
try:
|
| 569 |
-
# Step 1: Semantic search in Qdrant (get more results for filtering)
|
| 570 |
query_vector = await embed_query(user_query)
|
| 571 |
-
|
| 572 |
if not query_vector:
|
| 573 |
-
logger.warning("Embedding failed in QDRANT_THEN_MONGO")
|
| 574 |
return [], "XOF"
|
| 575 |
|
| 576 |
-
# Get semantic results
|
| 577 |
qdrant_results = await qdrant_client.query_points(
|
| 578 |
collection_name=COLLECTION_NAME,
|
| 579 |
query=query_vector,
|
| 580 |
-
limit=limit *
|
| 581 |
with_payload=True
|
| 582 |
)
|
| 583 |
|
| 584 |
candidates = [dict(point.payload) for point in qdrant_results.points]
|
| 585 |
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
# Step 2: Apply MongoDB-style filters in Python
|
| 589 |
filtered_results = []
|
| 590 |
|
| 591 |
for listing in candidates:
|
| 592 |
-
#
|
| 593 |
if search_params.get("location"):
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
if
|
| 597 |
-
continue
|
| 598 |
-
|
| 599 |
-
if search_params.get("min_price"):
|
| 600 |
-
if listing.get("price", 0) < search_params["min_price"]:
|
| 601 |
-
continue
|
| 602 |
-
|
| 603 |
-
if search_params.get("max_price"):
|
| 604 |
-
if listing.get("price", float('inf')) > search_params["max_price"]:
|
| 605 |
-
continue
|
| 606 |
-
|
| 607 |
-
if search_params.get("bedrooms"):
|
| 608 |
-
if listing.get("bedrooms", 0) < search_params["bedrooms"]:
|
| 609 |
-
continue
|
| 610 |
-
|
| 611 |
-
if search_params.get("bathrooms"):
|
| 612 |
-
if listing.get("bathrooms", 0) < search_params["bathrooms"]:
|
| 613 |
continue
|
| 614 |
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
|
|
|
|
|
|
|
|
|
| 623 |
filtered_results.append(listing)
|
| 624 |
-
|
| 625 |
if len(filtered_results) >= limit:
|
| 626 |
break
|
| 627 |
|
| 628 |
-
logger.info(f"After filtering: {len(filtered_results)} results")
|
| 629 |
-
|
| 630 |
# Infer currency
|
| 631 |
currency = "XOF"
|
| 632 |
if search_params.get("location"):
|
| 633 |
currency, _ = await infer_currency_from_location(search_params["location"])
|
| 634 |
-
|
| 635 |
return filtered_results, currency
|
| 636 |
-
|
| 637 |
except Exception as e:
|
| 638 |
logger.error(f"QDRANT_THEN_MONGO error: {e}")
|
| 639 |
return [], "XOF"
|
|
|
|
| 412 |
) -> Tuple[List[Dict], str]:
|
| 413 |
"""
|
| 414 |
Strategy: Filter by location/price in MongoDB first, then use Qdrant for semantic ranking.
|
| 415 |
+
STRICT MODE: If MongoDB finds nothing for the location, RETURN EMPTY. Do not prompt unrelated listings.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
"""
|
| 417 |
|
| 418 |
logger.info("MONGO_THEN_QDRANT strategy", location=search_params.get("location"))
|
|
|
|
| 426 |
try:
|
| 427 |
db = await get_db()
|
| 428 |
|
| 429 |
+
# Step 1: Build MongoDB filter
|
| 430 |
mongo_query = {"status": "active"}
|
| 431 |
|
| 432 |
if search_params.get("location"):
|
| 433 |
+
mongo_query["location"] = {"$regex": search_params["location"], "$options": "i"}
|
|
|
|
| 434 |
|
| 435 |
if search_params.get("min_price"):
|
| 436 |
mongo_query["price"] = {"$gte": search_params["min_price"]}
|
|
|
|
| 453 |
logger.info("MongoDB pre-filter", query=mongo_query)
|
| 454 |
|
| 455 |
# Fetch more results than needed for semantic filtering
|
| 456 |
+
cursor = db.listings.find(mongo_query).limit(limit * 5)
|
| 457 |
mongo_results = []
|
| 458 |
async for doc in cursor:
|
| 459 |
if "_id" in doc:
|
|
|
|
| 462 |
|
| 463 |
logger.info(f"MongoDB returned {len(mongo_results)} candidates")
|
| 464 |
|
| 465 |
+
# STRICT CHECK: If location was requested but nothing found, return empty
|
| 466 |
if not mongo_results:
|
|
|
|
| 467 |
currency = "XOF"
|
| 468 |
if search_params.get("location"):
|
| 469 |
currency, _ = await infer_currency_from_location(search_params["location"])
|
| 470 |
return [], currency
|
| 471 |
|
| 472 |
# Step 2: Use Qdrant to semantically rank these MongoDB results
|
|
|
|
| 473 |
mongo_ids = [doc.get("_id") for doc in mongo_results]
|
| 474 |
|
| 475 |
+
# Embed query
|
| 476 |
query_vector = await embed_query(user_query)
|
| 477 |
|
| 478 |
if not query_vector:
|
| 479 |
+
# Fallback to MongoDB results (they are at least valid matches)
|
| 480 |
+
# Just return top N by recency (assuming they came sorted by created_at desc)
|
| 481 |
+
return mongo_results[:limit], "XOF"
|
|
|
|
|
|
|
|
|
|
| 482 |
|
| 483 |
+
# Search Qdrant filtering ONLY to these IDs
|
| 484 |
from qdrant_client.models import Filter, FieldCondition, MatchAny
|
| 485 |
|
| 486 |
id_filter = Filter(
|
|
|
|
| 500 |
with_payload=True
|
| 501 |
)
|
| 502 |
|
|
|
|
| 503 |
final_results = []
|
| 504 |
for point in qdrant_results.points:
|
| 505 |
listing = dict(point.payload)
|
|
|
|
| 507 |
listing["_search_strategy"] = "MONGO_THEN_QDRANT"
|
| 508 |
final_results.append(listing)
|
| 509 |
|
|
|
|
|
|
|
| 510 |
# Infer currency
|
| 511 |
currency = "XOF"
|
| 512 |
if search_params.get("location"):
|
| 513 |
currency, _ = await infer_currency_from_location(search_params["location"])
|
| 514 |
+
|
| 515 |
return final_results, currency
|
| 516 |
|
| 517 |
except Exception as e:
|
|
|
|
| 525 |
limit: int = 10
|
| 526 |
) -> Tuple[List[Dict], str]:
|
| 527 |
"""
|
| 528 |
+
Strategy: Semantic search first, then STRICTLY apply filters in Python.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
"""
|
| 530 |
|
| 531 |
logger.info("QDRANT_THEN_MONGO strategy")
|
| 532 |
|
| 533 |
if not qdrant_client:
|
|
|
|
| 534 |
return [], "XOF"
|
| 535 |
|
| 536 |
try:
|
|
|
|
| 537 |
query_vector = await embed_query(user_query)
|
|
|
|
| 538 |
if not query_vector:
|
|
|
|
| 539 |
return [], "XOF"
|
| 540 |
|
| 541 |
+
# Get purely semantic results
|
| 542 |
qdrant_results = await qdrant_client.query_points(
|
| 543 |
collection_name=COLLECTION_NAME,
|
| 544 |
query=query_vector,
|
| 545 |
+
limit=limit * 5, # Fetch more to allow for filtering loss
|
| 546 |
with_payload=True
|
| 547 |
)
|
| 548 |
|
| 549 |
candidates = [dict(point.payload) for point in qdrant_results.points]
|
| 550 |
|
| 551 |
+
# STRICT FILTERING in Python
|
|
|
|
|
|
|
| 552 |
filtered_results = []
|
| 553 |
|
| 554 |
for listing in candidates:
|
| 555 |
+
# Location (partial match, case insensitive)
|
| 556 |
if search_params.get("location"):
|
| 557 |
+
req_loc = search_params["location"].lower()
|
| 558 |
+
list_loc = listing.get("location", "").lower()
|
| 559 |
+
if req_loc not in list_loc:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 560 |
continue
|
| 561 |
|
| 562 |
+
# Price
|
| 563 |
+
price = listing.get("price", 0)
|
| 564 |
+
if search_params.get("min_price") and price < search_params["min_price"]:
|
| 565 |
+
continue
|
| 566 |
+
if search_params.get("max_price") and price > search_params["max_price"]:
|
| 567 |
+
continue
|
| 568 |
+
|
| 569 |
+
# Bedrooms
|
| 570 |
+
if search_params.get("bedrooms") and listing.get("bedrooms", 0) < search_params["bedrooms"]:
|
| 571 |
+
continue
|
| 572 |
+
|
| 573 |
filtered_results.append(listing)
|
|
|
|
| 574 |
if len(filtered_results) >= limit:
|
| 575 |
break
|
| 576 |
|
|
|
|
|
|
|
| 577 |
# Infer currency
|
| 578 |
currency = "XOF"
|
| 579 |
if search_params.get("location"):
|
| 580 |
currency, _ = await infer_currency_from_location(search_params["location"])
|
| 581 |
+
|
| 582 |
return filtered_results, currency
|
| 583 |
+
|
| 584 |
except Exception as e:
|
| 585 |
logger.error(f"QDRANT_THEN_MONGO error: {e}")
|
| 586 |
return [], "XOF"
|
app/routes/search.py
CHANGED
|
@@ -162,17 +162,19 @@ async def aida_search(
|
|
| 162 |
if strategy == SearchStrategy.MONGO_ONLY:
|
| 163 |
results = await search_mongodb(search_params, dto.limit)
|
| 164 |
|
| 165 |
-
#
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
| 176 |
|
| 177 |
elif strategy == SearchStrategy.QDRANT_ONLY:
|
| 178 |
results, _ = await search_listings_hybrid(
|
|
|
|
| 162 |
if strategy == SearchStrategy.MONGO_ONLY:
|
| 163 |
results = await search_mongodb(search_params, dto.limit)
|
| 164 |
|
| 165 |
+
# STRICT MODE: Do NOT fallback to Qdrant if location was specified but nothing found.
|
| 166 |
+
# This prevents "House in USA" -> "Found house in Lagos"
|
| 167 |
+
# We only want fallback for truly vague queries.
|
| 168 |
+
if not results and not search_params.get("location"):
|
| 169 |
+
logger.info("MONGO_ONLY yielded no results for general query, falling back to QDRANT_ONLY")
|
| 170 |
+
from app.ai.services.search_service import search_listings_hybrid
|
| 171 |
+
results, _ = await search_listings_hybrid(
|
| 172 |
+
user_query=dto.query,
|
| 173 |
+
search_params=search_params,
|
| 174 |
+
limit=dto.limit,
|
| 175 |
+
mode="relaxed"
|
| 176 |
+
)
|
| 177 |
+
strategy = "QDRANT_FALLBACK"
|
| 178 |
|
| 179 |
elif strategy == SearchStrategy.QDRANT_ONLY:
|
| 180 |
results, _ = await search_listings_hybrid(
|