destinyebuka commited on
Commit
fbe31d6
·
1 Parent(s): a82bbdb
app/ai/services/search_service.py CHANGED
@@ -412,18 +412,7 @@ async def search_mongo_then_qdrant(
412
  ) -> Tuple[List[Dict], str]:
413
  """
414
  Strategy: Filter by location/price in MongoDB first, then use Qdrant for semantic ranking.
415
-
416
- Use case: "house close to international school in Calavi"
417
- - Step 1: MongoDB filters for Calavi
418
- - Step 2: Qdrant semantic search within those results for "close to school"
419
-
420
- Args:
421
- user_query: Original user query for semantic understanding
422
- search_params: Extracted parameters (must have location or price)
423
- limit: Max results to return
424
-
425
- Returns:
426
- Tuple of (results, inferred_currency)
427
  """
428
 
429
  logger.info("MONGO_THEN_QDRANT strategy", location=search_params.get("location"))
@@ -437,12 +426,11 @@ async def search_mongo_then_qdrant(
437
  try:
438
  db = await get_db()
439
 
440
- # Step 1: Build MongoDB filter for location/price/basic filters
441
  mongo_query = {"status": "active"}
442
 
443
  if search_params.get("location"):
444
- location = search_params["location"]
445
- mongo_query["location"] = {"$regex": location, "$options": "i"}
446
 
447
  if search_params.get("min_price"):
448
  mongo_query["price"] = {"$gte": search_params["min_price"]}
@@ -465,7 +453,7 @@ async def search_mongo_then_qdrant(
465
  logger.info("MongoDB pre-filter", query=mongo_query)
466
 
467
  # Fetch more results than needed for semantic filtering
468
- cursor = db.listings.find(mongo_query).limit(limit * 3)
469
  mongo_results = []
470
  async for doc in cursor:
471
  if "_id" in doc:
@@ -474,29 +462,25 @@ async def search_mongo_then_qdrant(
474
 
475
  logger.info(f"MongoDB returned {len(mongo_results)} candidates")
476
 
 
477
  if not mongo_results:
478
- # No results from MongoDB filter
479
  currency = "XOF"
480
  if search_params.get("location"):
481
  currency, _ = await infer_currency_from_location(search_params["location"])
482
  return [], currency
483
 
484
  # Step 2: Use Qdrant to semantically rank these MongoDB results
485
- # Get their IDs
486
  mongo_ids = [doc.get("_id") for doc in mongo_results]
487
 
488
- # Embed the user query for semantic search
489
  query_vector = await embed_query(user_query)
490
 
491
  if not query_vector:
492
- # If embedding fails, return MongoDB results as-is
493
- logger.warning("Embedding failed, returning MongoDB results")
494
- currency = "XOF"
495
- if search_params.get("location"):
496
- currency, _ = await infer_currency_from_location(search_params["location"])
497
- return mongo_results[:limit], currency
498
 
499
- # Search Qdrant but filter to only these MongoDB IDs
500
  from qdrant_client.models import Filter, FieldCondition, MatchAny
501
 
502
  id_filter = Filter(
@@ -516,7 +500,6 @@ async def search_mongo_then_qdrant(
516
  with_payload=True
517
  )
518
 
519
- # Format results
520
  final_results = []
521
  for point in qdrant_results.points:
522
  listing = dict(point.payload)
@@ -524,13 +507,11 @@ async def search_mongo_then_qdrant(
524
  listing["_search_strategy"] = "MONGO_THEN_QDRANT"
525
  final_results.append(listing)
526
 
527
- logger.info(f"Qdrant semantic ranking returned {len(final_results)} results")
528
-
529
  # Infer currency
530
  currency = "XOF"
531
  if search_params.get("location"):
532
  currency, _ = await infer_currency_from_location(search_params["location"])
533
-
534
  return final_results, currency
535
 
536
  except Exception as e:
@@ -544,96 +525,62 @@ async def search_qdrant_then_mongo(
544
  limit: int = 10
545
  ) -> Tuple[List[Dict], str]:
546
  """
547
- Strategy: Semantic search first in Qdrant, then apply MongoDB filters.
548
-
549
- Use case: "modern luxurious apartment" (semantic primary, filters secondary)
550
- - Step 1: Qdrant semantic search for "modern luxurious"
551
- - Step 2: Apply MongoDB filters to those results
552
-
553
- Args:
554
- user_query: Original user query for semantic search
555
- search_params: Extracted parameters for filtering
556
- limit: Max results to return
557
-
558
- Returns:
559
- Tuple of (results, inferred_currency)
560
  """
561
 
562
  logger.info("QDRANT_THEN_MONGO strategy")
563
 
564
  if not qdrant_client:
565
- logger.error("Qdrant client not available")
566
  return [], "XOF"
567
 
568
  try:
569
- # Step 1: Semantic search in Qdrant (get more results for filtering)
570
  query_vector = await embed_query(user_query)
571
-
572
  if not query_vector:
573
- logger.warning("Embedding failed in QDRANT_THEN_MONGO")
574
  return [], "XOF"
575
 
576
- # Get semantic results (no filters yet)
577
  qdrant_results = await qdrant_client.query_points(
578
  collection_name=COLLECTION_NAME,
579
  query=query_vector,
580
- limit=limit * 3, # Get more for filtering
581
  with_payload=True
582
  )
583
 
584
  candidates = [dict(point.payload) for point in qdrant_results.points]
585
 
586
- logger.info(f"Qdrant returned {len(candidates)} semantic candidates")
587
-
588
- # Step 2: Apply MongoDB-style filters in Python
589
  filtered_results = []
590
 
591
  for listing in candidates:
592
- # Check each filter
593
  if search_params.get("location"):
594
- location_filter = search_params["location"].lower()
595
- listing_location = listing.get("location", "").lower()
596
- if location_filter not in listing_location:
597
- continue
598
-
599
- if search_params.get("min_price"):
600
- if listing.get("price", 0) < search_params["min_price"]:
601
- continue
602
-
603
- if search_params.get("max_price"):
604
- if listing.get("price", float('inf')) > search_params["max_price"]:
605
- continue
606
-
607
- if search_params.get("bedrooms"):
608
- if listing.get("bedrooms", 0) < search_params["bedrooms"]:
609
- continue
610
-
611
- if search_params.get("bathrooms"):
612
- if listing.get("bathrooms", 0) < search_params["bathrooms"]:
613
  continue
614
 
615
- if search_params.get("listing_type"):
616
- listing_type_filter = search_params["listing_type"].lower()
617
- listing_type = listing.get("listing_type", "").lower()
618
- if listing_type_filter not in listing_type:
619
- continue
620
-
621
- # Passed all filters
622
- listing["_search_strategy"] = "QDRANT_THEN_MONGO"
 
 
 
623
  filtered_results.append(listing)
624
-
625
  if len(filtered_results) >= limit:
626
  break
627
 
628
- logger.info(f"After filtering: {len(filtered_results)} results")
629
-
630
  # Infer currency
631
  currency = "XOF"
632
  if search_params.get("location"):
633
  currency, _ = await infer_currency_from_location(search_params["location"])
634
-
635
  return filtered_results, currency
636
-
637
  except Exception as e:
638
  logger.error(f"QDRANT_THEN_MONGO error: {e}")
639
  return [], "XOF"
 
412
  ) -> Tuple[List[Dict], str]:
413
  """
414
  Strategy: Filter by location/price in MongoDB first, then use Qdrant for semantic ranking.
415
+ STRICT MODE: If MongoDB finds nothing for the location, RETURN EMPTY. Do not prompt unrelated listings.
 
 
 
 
 
 
 
 
 
 
 
416
  """
417
 
418
  logger.info("MONGO_THEN_QDRANT strategy", location=search_params.get("location"))
 
426
  try:
427
  db = await get_db()
428
 
429
+ # Step 1: Build MongoDB filter
430
  mongo_query = {"status": "active"}
431
 
432
  if search_params.get("location"):
433
+ mongo_query["location"] = {"$regex": search_params["location"], "$options": "i"}
 
434
 
435
  if search_params.get("min_price"):
436
  mongo_query["price"] = {"$gte": search_params["min_price"]}
 
453
  logger.info("MongoDB pre-filter", query=mongo_query)
454
 
455
  # Fetch more results than needed for semantic filtering
456
+ cursor = db.listings.find(mongo_query).limit(limit * 5)
457
  mongo_results = []
458
  async for doc in cursor:
459
  if "_id" in doc:
 
462
 
463
  logger.info(f"MongoDB returned {len(mongo_results)} candidates")
464
 
465
+ # STRICT CHECK: If location was requested but nothing found, return empty
466
  if not mongo_results:
 
467
  currency = "XOF"
468
  if search_params.get("location"):
469
  currency, _ = await infer_currency_from_location(search_params["location"])
470
  return [], currency
471
 
472
  # Step 2: Use Qdrant to semantically rank these MongoDB results
 
473
  mongo_ids = [doc.get("_id") for doc in mongo_results]
474
 
475
+ # Embed query
476
  query_vector = await embed_query(user_query)
477
 
478
  if not query_vector:
479
+ # Fallback to MongoDB results (they are at least valid matches)
480
+ # Just return top N by recency (assuming they came sorted by created_at desc)
481
+ return mongo_results[:limit], "XOF"
 
 
 
482
 
483
+ # Search Qdrant filtering ONLY to these IDs
484
  from qdrant_client.models import Filter, FieldCondition, MatchAny
485
 
486
  id_filter = Filter(
 
500
  with_payload=True
501
  )
502
 
 
503
  final_results = []
504
  for point in qdrant_results.points:
505
  listing = dict(point.payload)
 
507
  listing["_search_strategy"] = "MONGO_THEN_QDRANT"
508
  final_results.append(listing)
509
 
 
 
510
  # Infer currency
511
  currency = "XOF"
512
  if search_params.get("location"):
513
  currency, _ = await infer_currency_from_location(search_params["location"])
514
+
515
  return final_results, currency
516
 
517
  except Exception as e:
 
525
  limit: int = 10
526
  ) -> Tuple[List[Dict], str]:
527
  """
528
+ Strategy: Semantic search first, then STRICTLY apply filters in Python.
 
 
 
 
 
 
 
 
 
 
 
 
529
  """
530
 
531
  logger.info("QDRANT_THEN_MONGO strategy")
532
 
533
  if not qdrant_client:
 
534
  return [], "XOF"
535
 
536
  try:
 
537
  query_vector = await embed_query(user_query)
 
538
  if not query_vector:
 
539
  return [], "XOF"
540
 
541
+ # Get purely semantic results
542
  qdrant_results = await qdrant_client.query_points(
543
  collection_name=COLLECTION_NAME,
544
  query=query_vector,
545
+ limit=limit * 5, # Fetch more to allow for filtering loss
546
  with_payload=True
547
  )
548
 
549
  candidates = [dict(point.payload) for point in qdrant_results.points]
550
 
551
+ # STRICT FILTERING in Python
 
 
552
  filtered_results = []
553
 
554
  for listing in candidates:
555
+ # Location (partial match, case insensitive)
556
  if search_params.get("location"):
557
+ req_loc = search_params["location"].lower()
558
+ list_loc = listing.get("location", "").lower()
559
+ if req_loc not in list_loc:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
560
  continue
561
 
562
+ # Price
563
+ price = listing.get("price", 0)
564
+ if search_params.get("min_price") and price < search_params["min_price"]:
565
+ continue
566
+ if search_params.get("max_price") and price > search_params["max_price"]:
567
+ continue
568
+
569
+ # Bedrooms
570
+ if search_params.get("bedrooms") and listing.get("bedrooms", 0) < search_params["bedrooms"]:
571
+ continue
572
+
573
  filtered_results.append(listing)
 
574
  if len(filtered_results) >= limit:
575
  break
576
 
 
 
577
  # Infer currency
578
  currency = "XOF"
579
  if search_params.get("location"):
580
  currency, _ = await infer_currency_from_location(search_params["location"])
581
+
582
  return filtered_results, currency
583
+
584
  except Exception as e:
585
  logger.error(f"QDRANT_THEN_MONGO error: {e}")
586
  return [], "XOF"
app/routes/search.py CHANGED
@@ -162,17 +162,19 @@ async def aida_search(
162
  if strategy == SearchStrategy.MONGO_ONLY:
163
  results = await search_mongodb(search_params, dto.limit)
164
 
165
- # Fallback for structured searches if NO results
166
- if not results:
167
- logger.info("MONGO_ONLY yielded no results, falling back to QDRANT_ONLY as safety net")
168
- from app.ai.services.search_service import search_listings_hybrid
169
- results, _ = await search_listings_hybrid(
170
- user_query=dto.query,
171
- search_params=search_params,
172
- limit=dto.limit,
173
- mode="relaxed"
174
- )
175
- strategy = "QDRANT_FALLBACK"
 
 
176
 
177
  elif strategy == SearchStrategy.QDRANT_ONLY:
178
  results, _ = await search_listings_hybrid(
 
162
  if strategy == SearchStrategy.MONGO_ONLY:
163
  results = await search_mongodb(search_params, dto.limit)
164
 
165
+ # STRICT MODE: Do NOT fallback to Qdrant if location was specified but nothing found.
166
+ # This prevents "House in USA" -> "Found house in Lagos"
167
+ # We only want fallback for truly vague queries.
168
+ if not results and not search_params.get("location"):
169
+ logger.info("MONGO_ONLY yielded no results for general query, falling back to QDRANT_ONLY")
170
+ from app.ai.services.search_service import search_listings_hybrid
171
+ results, _ = await search_listings_hybrid(
172
+ user_query=dto.query,
173
+ search_params=search_params,
174
+ limit=dto.limit,
175
+ mode="relaxed"
176
+ )
177
+ strategy = "QDRANT_FALLBACK"
178
 
179
  elif strategy == SearchStrategy.QDRANT_ONLY:
180
  results, _ = await search_listings_hybrid(