Spaces:

destinyebuka
/

AIDA

Running

App Files Files Community

destinyebuka commited on Dec 23, 2025

Commit

fbe31d6

1 Parent(s): a82bbdb

fyp

Browse files

Files changed (2) hide show

app/ai/services/search_service.py +32 -85
app/routes/search.py +13 -11

app/ai/services/search_service.py CHANGED Viewed

@@ -412,18 +412,7 @@ async def search_mongo_then_qdrant(
 ) -> Tuple[List[Dict], str]:
     """
     Strategy: Filter by location/price in MongoDB first, then use Qdrant for semantic ranking.
-    Use case: "house close to international school in Calavi"
-    - Step 1: MongoDB filters for Calavi
-    - Step 2: Qdrant semantic search within those results for "close to school"
-    Args:
-        user_query: Original user query for semantic understanding
-        search_params: Extracted parameters (must have location or price)
-        limit: Max results to return
-    Returns:
-        Tuple of (results, inferred_currency)
     """
     logger.info("MONGO_THEN_QDRANT strategy", location=search_params.get("location"))
@@ -437,12 +426,11 @@ async def search_mongo_then_qdrant(
     try:
         db = await get_db()
-        # Step 1: Build MongoDB filter for location/price/basic filters
         mongo_query = {"status": "active"}
         if search_params.get("location"):
-            location = search_params["location"]
-            mongo_query["location"] = {"$regex": location, "$options": "i"}
         if search_params.get("min_price"):
             mongo_query["price"] = {"$gte": search_params["min_price"]}
@@ -465,7 +453,7 @@ async def search_mongo_then_qdrant(
         logger.info("MongoDB pre-filter", query=mongo_query)
         # Fetch more results than needed for semantic filtering
-        cursor = db.listings.find(mongo_query).limit(limit * 3)
         mongo_results = []
         async for doc in cursor:
             if "_id" in doc:
@@ -474,29 +462,25 @@ async def search_mongo_then_qdrant(
         logger.info(f"MongoDB returned {len(mongo_results)} candidates")
         if not mongo_results:
-            # No results from MongoDB filter
             currency = "XOF"
             if search_params.get("location"):
                 currency, _ = await infer_currency_from_location(search_params["location"])
             return [], currency
         # Step 2: Use Qdrant to semantically rank these MongoDB results
-        # Get their IDs
         mongo_ids = [doc.get("_id") for doc in mongo_results]
-        # Embed the user query for semantic search
         query_vector = await embed_query(user_query)
         if not query_vector:
-            # If embedding fails, return MongoDB results as-is
-            logger.warning("Embedding failed, returning MongoDB results")
-            currency = "XOF"
-            if search_params.get("location"):
-                currency, _ = await infer_currency_from_location(search_params["location"])
-            return mongo_results[:limit], currency
-        # Search Qdrant but filter to only these MongoDB IDs
         from qdrant_client.models import Filter, FieldCondition, MatchAny
         id_filter = Filter(
@@ -516,7 +500,6 @@ async def search_mongo_then_qdrant(
             with_payload=True
         )
-        # Format results
         final_results = []
         for point in qdrant_results.points:
             listing = dict(point.payload)
@@ -524,13 +507,11 @@ async def search_mongo_then_qdrant(
             listing["_search_strategy"] = "MONGO_THEN_QDRANT"
             final_results.append(listing)
-        logger.info(f"Qdrant semantic ranking returned {len(final_results)} results")
         # Infer currency
         currency = "XOF"
         if search_params.get("location"):
             currency, _ = await infer_currency_from_location(search_params["location"])
         return final_results, currency
     except Exception as e:
@@ -544,96 +525,62 @@ async def search_qdrant_then_mongo(
     limit: int = 10
 ) -> Tuple[List[Dict], str]:
     """
-    Strategy: Semantic search first in Qdrant, then apply MongoDB filters.
-    Use case: "modern luxurious apartment" (semantic primary, filters secondary)
-    - Step 1: Qdrant semantic search for "modern luxurious"
-    - Step 2: Apply MongoDB filters to those results
-    Args:
-        user_query: Original user query for semantic search
-        search_params: Extracted parameters for filtering
-        limit: Max results to return
-    Returns:
-        Tuple of (results, inferred_currency)
     """
     logger.info("QDRANT_THEN_MONGO strategy")
     if not qdrant_client:
-        logger.error("Qdrant client not available")
         return [], "XOF"
     try:
-        # Step 1: Semantic search in Qdrant (get more results for filtering)
         query_vector = await embed_query(user_query)
         if not query_vector:
-            logger.warning("Embedding failed in QDRANT_THEN_MONGO")
             return [], "XOF"
-        # Get semantic results (no filters yet)
         qdrant_results = await qdrant_client.query_points(
             collection_name=COLLECTION_NAME,
             query=query_vector,
-            limit=limit * 3,  # Get more for filtering
             with_payload=True
         )
         candidates = [dict(point.payload) for point in qdrant_results.points]
-        logger.info(f"Qdrant returned {len(candidates)} semantic candidates")
-        # Step 2: Apply MongoDB-style filters in Python
         filtered_results = []
         for listing in candidates:
-            # Check each filter
             if search_params.get("location"):
-                location_filter = search_params["location"].lower()
-                listing_location = listing.get("location", "").lower()
-                if location_filter not in listing_location:
-                    continue
-            if search_params.get("min_price"):
-                if listing.get("price", 0) < search_params["min_price"]:
-                    continue
-            if search_params.get("max_price"):
-                if listing.get("price", float('inf')) > search_params["max_price"]:
-                    continue
-            if search_params.get("bedrooms"):
-                if listing.get("bedrooms", 0) < search_params["bedrooms"]:
-                    continue
-            if search_params.get("bathrooms"):
-                if listing.get("bathrooms", 0) < search_params["bathrooms"]:
                     continue
-            if search_params.get("listing_type"):
-                listing_type_filter = search_params["listing_type"].lower()
-                listing_type = listing.get("listing_type", "").lower()
-                if listing_type_filter not in listing_type:
-                    continue
-            # Passed all filters
-            listing["_search_strategy"] = "QDRANT_THEN_MONGO"
             filtered_results.append(listing)
             if len(filtered_results) >= limit:
                 break
-        logger.info(f"After filtering: {len(filtered_results)} results")
         # Infer currency
         currency = "XOF"
         if search_params.get("location"):
             currency, _ = await infer_currency_from_location(search_params["location"])
         return filtered_results, currency
     except Exception as e:
         logger.error(f"QDRANT_THEN_MONGO error: {e}")
         return [], "XOF"

 ) -> Tuple[List[Dict], str]:
     """
     Strategy: Filter by location/price in MongoDB first, then use Qdrant for semantic ranking.
+    STRICT MODE: If MongoDB finds nothing for the location, RETURN EMPTY. Do not prompt unrelated listings.
     """
     logger.info("MONGO_THEN_QDRANT strategy", location=search_params.get("location"))
     try:
         db = await get_db()
+        # Step 1: Build MongoDB filter
         mongo_query = {"status": "active"}
         if search_params.get("location"):
+            mongo_query["location"] = {"$regex": search_params["location"], "$options": "i"}
         if search_params.get("min_price"):
             mongo_query["price"] = {"$gte": search_params["min_price"]}
         logger.info("MongoDB pre-filter", query=mongo_query)
         # Fetch more results than needed for semantic filtering
+        cursor = db.listings.find(mongo_query).limit(limit * 5)
         mongo_results = []
         async for doc in cursor:
             if "_id" in doc:
         logger.info(f"MongoDB returned {len(mongo_results)} candidates")
+        # STRICT CHECK: If location was requested but nothing found, return empty
         if not mongo_results:
             currency = "XOF"
             if search_params.get("location"):
                 currency, _ = await infer_currency_from_location(search_params["location"])
             return [], currency
         # Step 2: Use Qdrant to semantically rank these MongoDB results
         mongo_ids = [doc.get("_id") for doc in mongo_results]
+        # Embed query
         query_vector = await embed_query(user_query)
         if not query_vector:
+            # Fallback to MongoDB results (they are at least valid matches)
+            # Just return top N by recency (assuming they came sorted by created_at desc)
+            return mongo_results[:limit], "XOF"
+        # Search Qdrant filtering ONLY to these IDs
         from qdrant_client.models import Filter, FieldCondition, MatchAny
         id_filter = Filter(
             with_payload=True
         )
         final_results = []
         for point in qdrant_results.points:
             listing = dict(point.payload)
             listing["_search_strategy"] = "MONGO_THEN_QDRANT"
             final_results.append(listing)
         # Infer currency
         currency = "XOF"
         if search_params.get("location"):
             currency, _ = await infer_currency_from_location(search_params["location"])
         return final_results, currency
     except Exception as e:
     limit: int = 10
 ) -> Tuple[List[Dict], str]:
     """
+    Strategy: Semantic search first, then STRICTLY apply filters in Python.
     """
     logger.info("QDRANT_THEN_MONGO strategy")
     if not qdrant_client:
         return [], "XOF"
     try:
         query_vector = await embed_query(user_query)
         if not query_vector:
             return [], "XOF"
+        # Get purely semantic results
         qdrant_results = await qdrant_client.query_points(
             collection_name=COLLECTION_NAME,
             query=query_vector,
+            limit=limit * 5,  # Fetch more to allow for filtering loss
             with_payload=True
         )
         candidates = [dict(point.payload) for point in qdrant_results.points]
+        # STRICT FILTERING in Python
         filtered_results = []
         for listing in candidates:
+            # Location (partial match, case insensitive)
             if search_params.get("location"):
+                req_loc = search_params["location"].lower()
+                list_loc = listing.get("location", "").lower()
+                if req_loc not in list_loc:
                     continue
+            # Price
+            price = listing.get("price", 0)
+            if search_params.get("min_price") and price < search_params["min_price"]:
+                continue
+            if search_params.get("max_price") and price > search_params["max_price"]:
+                continue
+            # Bedrooms
+            if search_params.get("bedrooms") and listing.get("bedrooms", 0) < search_params["bedrooms"]:
+                continue
             filtered_results.append(listing)
             if len(filtered_results) >= limit:
                 break
         # Infer currency
         currency = "XOF"
         if search_params.get("location"):
             currency, _ = await infer_currency_from_location(search_params["location"])
         return filtered_results, currency
     except Exception as e:
         logger.error(f"QDRANT_THEN_MONGO error: {e}")
         return [], "XOF"

app/routes/search.py CHANGED Viewed

@@ -162,17 +162,19 @@ async def aida_search(
         if strategy == SearchStrategy.MONGO_ONLY:
             results = await search_mongodb(search_params, dto.limit)
-            # Fallback for structured searches if NO results
-            if not results:
-                logger.info("MONGO_ONLY yielded no results, falling back to QDRANT_ONLY as safety net")
-                from app.ai.services.search_service import search_listings_hybrid
-                results, _ = await search_listings_hybrid(
-                    user_query=dto.query,
-                    search_params=search_params,
-                    limit=dto.limit,
-                    mode="relaxed"
-                )
-                strategy = "QDRANT_FALLBACK"
         elif strategy == SearchStrategy.QDRANT_ONLY:
             results, _ = await search_listings_hybrid(

         if strategy == SearchStrategy.MONGO_ONLY:
             results = await search_mongodb(search_params, dto.limit)
+            # STRICT MODE: Do NOT fallback to Qdrant if location was specified but nothing found.
+            # This prevents "House in USA" -> "Found house in Lagos"
+            # We only want fallback for truly vague queries.
+            if not results and not search_params.get("location"):
+               logger.info("MONGO_ONLY yielded no results for general query, falling back to QDRANT_ONLY")
+               from app.ai.services.search_service import search_listings_hybrid
+               results, _ = await search_listings_hybrid(
+                   user_query=dto.query,
+                   search_params=search_params,
+                   limit=dto.limit,
+                   mode="relaxed"
+               )
+               strategy = "QDRANT_FALLBACK"
         elif strategy == SearchStrategy.QDRANT_ONLY:
             results, _ = await search_listings_hybrid(