destinyebuka commited on
Commit
a82bbdb
·
1 Parent(s): 1f88998
app/ai/agent/nodes/search_query.py CHANGED
@@ -1,367 +1,195 @@
1
  # app/ai/agent/nodes/search_query.py
2
  """
3
  Node: Process search queries and return matching listings.
4
- HYBRID SEARCH: Uses Qdrant vector search + payload filters for intelligent NLP-based search.
5
  """
6
 
7
- import json
8
- import re
9
  from structlog import get_logger
10
- from langchain_openai import ChatOpenAI
11
- from langchain_core.messages import SystemMessage, HumanMessage
12
 
13
  from app.ai.agent.state import AgentState, FlowState
14
- from app.ai.agent.validators import JSONValidator
15
  from app.database import get_db
16
- from app.config import settings
17
- from app.ai.services.search_service import search_listings_hybrid, infer_currency_from_location
18
 
19
- logger = get_logger(__name__)
20
-
21
- # Initialize LLM for search parameter extraction
22
- llm = ChatOpenAI(
23
- api_key=settings.DEEPSEEK_API_KEY,
24
- base_url=settings.DEEPSEEK_BASE_URL,
25
- model="deepseek-chat",
26
- temperature=0.3,
 
 
27
  )
28
 
29
- SEARCH_EXTRACTION_PROMPT = """You are extracting search criteria from a natural language property search query.
30
-
31
- User message: "{user_message}"
32
-
33
- Extract ONLY what is EXPLICITLY mentioned (set to null if not clearly stated):
34
- - location: City/area/neighborhood name (e.g., "Calavi", "Lagos", "Cotonou", "Victoria Island") or null
35
- - min_price: Minimum price as number or null
36
- - max_price: Maximum price as number or null (interpret "20k" as 20000, "of 20k" as max_price: 20000)
37
- - bedrooms: Minimum number of bedrooms or null
38
- - bathrooms: Minimum number of bathrooms or null
39
- - listing_type: ONLY if explicitly stated. Options: "rent", "short-stay", "sale", "roommate". Set to null otherwise.
40
- - price_type: Payment frequency or null. Options: "monthly", "weekly", "nightly", "yearly"
41
- - amenities: List of desired features (e.g., ["wifi", "balcony", "parking"]) or []
42
-
43
- IMPORTANT RULES:
44
- - Do NOT infer listing_type from words like "house", "apartment", "room" - these are property types, not listing types
45
- - ONLY set listing_type if user explicitly says "for rent", "to buy", "for sale", "short stay", "roommate"
46
- - "I want a house of 20k in Cotonou" → listing_type: null (not mentioned)
47
- - "I want to rent a house" → listing_type: "rent" (explicitly mentioned)
48
- - "House for sale in Lagos" → listing_type: "sale" (explicitly mentioned)
49
-
50
- Price understanding:
51
- - "50k" or "50K" = 50000
52
- - "of 20k" or "for 20k" = max_price: 20000
53
- - "under 50k" or "less than 50k" = max_price: 50000
54
- - "around 80k" = min_price: 70000, max_price: 90000
55
- - "per month" = price_type: "monthly"
56
- - "per night" = price_type: "nightly"
57
-
58
- Return ONLY valid JSON:
59
- {{
60
- "location": string or null,
61
- "min_price": number or null,
62
- "max_price": number or null,
63
- "bedrooms": integer or null,
64
- "bathrooms": integer or null,
65
- "listing_type": string or null,
66
- "price_type": string or null,
67
- "amenities": []
68
- }}"""
69
-
70
-
71
- async def extract_search_params(user_message: str) -> dict:
72
- """
73
- Extract search parameters from user message.
74
-
75
- Args:
76
- user_message: What user searched for
77
-
78
- Returns:
79
- Dict with search parameters
80
- """
81
-
82
- logger.info("Extracting search parameters", message_len=len(user_message))
83
-
84
- try:
85
- prompt = SEARCH_EXTRACTION_PROMPT.format(user_message=user_message)
86
-
87
- response = await llm.ainvoke([
88
- SystemMessage(content="Extract search parameters from user query. Return ONLY valid JSON."),
89
- HumanMessage(content=prompt)
90
- ])
91
-
92
- response_text = response.content if hasattr(response, 'content') else str(response)
93
-
94
- # ✅ Validate JSON
95
- validation = JSONValidator.extract_and_validate(response_text)
96
-
97
- if not validation.is_valid:
98
- logger.warning("Search parameter validation failed")
99
- return {}
100
-
101
- logger.info("Search parameters extracted", keys=list(validation.data.keys()))
102
- return validation.data
103
-
104
- except Exception as e:
105
- logger.error("Search extraction error", exc_info=e)
106
- return {}
107
 
108
 
109
- async def search_listings(search_params: dict) -> list:
110
  """
111
- Query MongoDB for listings matching search criteria.
112
-
113
- Args:
114
- search_params: Dict with location, price, bedrooms, etc.
115
-
116
- Returns:
117
- List of matching listings
118
  """
119
-
120
- logger.info("Searching listings", params_keys=list(search_params.keys()))
121
-
122
  try:
123
  db = await get_db()
124
 
125
  # Build MongoDB query
126
  query = {"status": "active"}
127
 
128
- # Location filter
129
- if search_params.get("location"):
130
- # Case-insensitive location search
131
- location = search_params["location"]
132
- query["location"] = {"$regex": location, "$options": "i"}
133
 
134
- # Price filters
135
- if search_params.get("min_price"):
136
- query["price"] = {"$gte": search_params["min_price"]}
137
- if search_params.get("max_price"):
138
  if "price" in query:
139
- query["price"]["$lte"] = search_params["max_price"]
140
  else:
141
- query["price"] = {"$lte": search_params["max_price"]}
142
-
143
- # Bedrooms
144
- if search_params.get("bedrooms"):
145
- query["bedrooms"] = {"$gte": search_params["bedrooms"]}
146
-
147
- # Bathrooms
148
- if search_params.get("bathrooms"):
149
- query["bathrooms"] = {"$gte": search_params["bathrooms"]}
150
 
151
- # Listing type
152
- if search_params.get("listing_type"):
153
- query["listing_type"] = search_params["listing_type"].lower()
154
 
155
- # Amenities
156
- if search_params.get("amenities"):
157
- amenities = [a.lower() for a in search_params["amenities"]]
158
- query["amenities"] = {"$in": amenities}
159
-
160
- logger.info("MongoDB query built", query=query)
 
 
 
 
 
 
161
 
162
- # Execute query with limit
163
- results = await db.listings.find(query).limit(10).to_list(10)
164
 
165
- # Convert ObjectId to string to prevent serialization errors
166
  for item in results:
167
  if "_id" in item:
168
  item["_id"] = str(item["_id"])
169
-
170
- logger.info("Search completed", results_count=len(results))
171
-
172
  return results
173
 
174
  except Exception as e:
175
- logger.error("Listing search error", exc_info=e)
176
  return []
177
 
178
 
179
- SEARCH_RESULTS_PROMPT = """You are presenting property search results to a user.
180
-
181
- CRITICAL LANGUAGE RULE:
182
- The user's query is: "{user_query}"
183
- - If the query is in ENGLISH, respond in ENGLISH
184
- - If the query is in FRENCH, respond in FRENCH
185
-
186
- USER INFO:
187
- - Name: {user_name}
188
- - Query: "{user_query}"
189
- - Mode: {search_mode}
190
-
191
- SEARCH RESULTS ({count} properties found):
192
- {listings_summary}
193
-
194
- CURRENCY: {currency}
195
-
196
- YOUR TASK - KEEP IT SHORT:
197
-
198
- 1. If search_mode is "strict" or "broad":
199
- - These are EXACT MATCHES for what the user asked.
200
- - Start with: "Here are {count} properties in [location]! 🏠" (or similar short intro)
201
- - DO NOT say "suggestions" or "alternatives" - these ARE what they asked for.
202
-
203
- 2. If search_mode is "relaxed":
204
- - You couldn't find exact matches, so these are alternatives.
205
- - Say: "I couldn't find exactly what you're looking for, but you might like these:"
206
-
207
- 3. FORMAT EACH PROPERTY (in the USER'S LANGUAGE):
208
- - Show: "1. [Title] - [Price] 💰"
209
- - Add ONE short sentence describing it IN THE USER'S LANGUAGE (even if title is different language)
210
- - Example English: "1. Villa de Prestige - 350,000 XOF/month 💰 → A luxurious 4-bedroom villa with pool"
211
- - Example French: "1. 3-Bed Rent in Cotonou - 200,000 XOF/mois 💰 → Appartement 3 chambres bien situé"
212
-
213
- 4. Keep responses concise - users see full details on cards below.
214
-
215
- Write ONLY the response text."""
216
-
217
-
218
- async def generate_search_results_text(
219
- listings: list,
220
- search_params: dict,
221
- user_query: str,
222
- user_name: str = None,
223
- inferred_currency: str = None,
224
- search_mode: str = "strict"
225
- ) -> str:
226
  """
227
- Use LLM to generate personalized, multilingual search results text.
 
228
  """
229
 
230
- count = len(listings)
231
-
232
- # Build listings summary for LLM
233
- if listings:
234
- listings_summary = ""
235
- for i, listing in enumerate(listings, 1):
236
- title = listing.get("title", "Untitled")
237
- loc = listing.get("location", "Unknown")
238
- price = float(listing.get("price", 0) or 0)
239
- currency = listing.get("currency", inferred_currency or "XOF")
240
- price_type = listing.get("price_type", "monthly")
241
- bedrooms = listing.get("bedrooms", "?")
242
- description = str(listing.get("description", ""))[:100]
243
- relevance = listing.get("_relevance_score", 0)
244
-
245
- listings_summary += f"""
246
- Property {i}:
247
- - Title: {title}
248
- - Location: {loc}
249
- - Price: {currency} {price:,.0f} {price_type}
250
- - Bedrooms: {bedrooms}
251
- - Description: {description}...
252
- - Match Score: {relevance:.2f}
253
- """
254
- else:
255
- listings_summary = "No properties found."
256
-
257
- # Format prompt
258
- prompt = SEARCH_RESULTS_PROMPT.format(
259
- user_name=user_name or "there",
260
- user_query=user_query,
261
- count=count,
262
- listings_summary=listings_summary,
263
- currency=inferred_currency or "local currency",
264
- search_mode=search_mode
265
- )
266
 
267
  try:
268
- messages = [
269
- SystemMessage(content="You are AIDA, a friendly and helpful real estate AI assistant. You help users find 'closest matches' when exact ones aren't available."),
270
- HumanMessage(content=prompt)
271
- ]
272
 
273
- response = await llm.ainvoke(messages)
274
- return response.content.strip()
275
-
276
- except Exception as e:
277
- logger.error("LLM search text generation failed", error=str(e))
278
- return f"I found {count} properties that might interest you! Take a look below."
279
-
 
 
 
 
 
 
280
 
281
- async def search_query_handler(state: AgentState) -> AgentState:
282
- """
283
- Handle search flow with Two-Step Hybrid Search (Strict -> Relaxed).
284
- """
285
-
286
- logger.info("Handling search query", user_id=state.user_id)
287
-
288
- try:
289
- # STEP 1: Extract search parameters
290
- search_params = await extract_search_params(state.last_user_message)
291
 
292
  if not search_params:
293
  state.temp_data["response_text"] = "I couldn't quite understand your search. Could you try rephrasing it?"
294
  state.temp_data["action"] = "search_invalid"
295
  return state
296
-
297
- # Helper: Check if query has location
298
- def has_location_filter(params: dict) -> bool:
299
- return bool(params.get("location"))
300
-
301
- # STEP 2: PRIMARY SEARCH - Always use MongoDB for location searches (exact match)
302
- # This ensures users get exact location results, not semantic "similar" results
303
- if has_location_filter(search_params):
304
- logger.info("Location search: Using MongoDB for exact match", location=search_params.get("location"))
305
- results = await search_listings(search_params)
306
- inferred_currency = await infer_currency_from_location(search_params.get("location"))
307
- search_mode = "strict"
308
 
309
- # STEP 3: If no exact matches, offer suggestions via Qdrant semantic search
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  if not results:
311
- logger.info("No exact matches found, trying Qdrant for suggestions...")
312
- results, _ = await search_listings_hybrid(
313
- user_query=state.last_user_message,
314
  search_params=search_params,
 
315
  mode="relaxed"
316
  )
317
- if results:
318
- search_mode = "relaxed"
319
- logger.info("Found semantic suggestions", count=len(results))
320
- else:
321
- # No location specified - use semantic search for general queries
322
- logger.info("General query: Using Qdrant semantic search")
323
- results, inferred_currency = await search_listings_hybrid(
324
- user_query=state.last_user_message,
325
  search_params=search_params,
326
- mode="strict"
 
327
  )
328
- search_mode = "strict"
329
 
330
- if not results:
331
- results, _ = await search_listings_hybrid(
332
- user_query=state.last_user_message,
333
- search_params=search_params,
334
- mode="relaxed"
335
- )
336
- if results:
337
- search_mode = "relaxed"
338
-
339
- # STEP 5: Generate LLM Response
340
- formatted_results = await generate_search_results_text(
341
- listings=results,
342
- search_params=search_params,
343
- user_query=state.last_user_message,
344
- user_name=state.user_name,
345
- inferred_currency=inferred_currency,
346
- search_mode=search_mode
 
 
 
 
347
  )
348
 
349
- # STEP 6: Finalize state
 
 
 
 
350
  state.search_results = results
351
- state.temp_data["response_text"] = formatted_results
352
  state.temp_data["action"] = "search_results"
353
- state.temp_data["inferred_currency"] = inferred_currency
 
354
 
355
- # Offer notification if no exact matches were found (even if suggestions were shown)
356
- if search_mode == "relaxed" or not results:
357
- state.temp_data["response_text"] += "\n\nWould you like me to notify you if an exact match for your request becomes available? Just say \"notify me\"!"
358
-
359
  state.transition_to(FlowState.SEARCH_RESULTS)
360
  state.transition_to(FlowState.IDLE)
361
 
362
  return state
363
-
364
  except Exception as e:
365
- logger.error("Search flow failed", exc_info=e)
366
  state.set_error(str(e))
367
  return state
 
1
  # app/ai/agent/nodes/search_query.py
2
  """
3
  Node: Process search queries and return matching listings.
4
+ INTELLIGENT HYBRID SEARCH: Uses strategy selector + hybrid/vector search + natural responses.
5
  """
6
 
 
 
7
  from structlog import get_logger
 
 
8
 
9
  from app.ai.agent.state import AgentState, FlowState
 
10
  from app.database import get_db
 
 
11
 
12
+ # Import shared intelligent services
13
+ from app.ai.services.search_intent_classifier import classify_search_intent
14
+ from app.ai.services.search_extractor import extract_search_params
15
+ from app.ai.services.search_strategy_selector import select_search_strategy, SearchStrategy
16
+ from app.ai.services.search_responder import generate_natural_response, generate_non_search_response
17
+ from app.ai.services.search_service import (
18
+ search_listings_hybrid,
19
+ search_mongo_then_qdrant,
20
+ search_qdrant_then_mongo,
21
+ infer_currency_from_location
22
  )
23
 
24
+ logger = get_logger(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
 
27
+ async def search_mongodb(params: dict, limit: int = 10) -> list:
28
  """
29
+ Search MongoDB for listings (used for MONGO_ONLY strategy).
30
+ Consistent with the implementation in app/routes/search.py
 
 
 
 
 
31
  """
 
 
 
32
  try:
33
  db = await get_db()
34
 
35
  # Build MongoDB query
36
  query = {"status": "active"}
37
 
38
+ # Filters
39
+ if params.get("location"):
40
+ query["location"] = {"$regex": params["location"], "$options": "i"}
 
 
41
 
42
+ if params.get("min_price"):
43
+ query["price"] = {"$gte": params["min_price"]}
44
+
45
+ if params.get("max_price"):
46
  if "price" in query:
47
+ query["price"]["$lte"] = params["max_price"]
48
  else:
49
+ query["price"] = {"$lte": params["max_price"]}
 
 
 
 
 
 
 
 
50
 
51
+ if params.get("bedrooms"):
52
+ query["bedrooms"] = {"$gte": params["bedrooms"]}
 
53
 
54
+ if params.get("bathrooms"):
55
+ query["bathrooms"] = {"$gte": params["bathrooms"]}
56
+
57
+ if params.get("listing_type"):
58
+ query["listing_type"] = {"$regex": params["listing_type"], "$options": "i"}
59
+
60
+ if params.get("amenities") and len(params["amenities"]) > 0:
61
+ # Note: Params amenities are already lowercased by extractor
62
+ amenity_regex = [{"amenities": {"$regex": a, "$options": "i"}} for a in params["amenities"]]
63
+ query["$and"] = amenity_regex
64
+
65
+ logger.info("Agent MongoDB query", query=query)
66
 
67
+ # Execute query
68
+ results = await db.listings.find(query).limit(limit).to_list(limit)
69
 
70
+ # Convert ObjectId
71
  for item in results:
72
  if "_id" in item:
73
  item["_id"] = str(item["_id"])
74
+
 
 
75
  return results
76
 
77
  except Exception as e:
78
+ logger.error("Agent MongoDB search error", exc_info=e)
79
  return []
80
 
81
 
82
+ async def search_query_handler(state: AgentState) -> AgentState:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  """
84
+ Handle search flow with INTELLIGENT STRATEGY SELECTION.
85
+ Consistent with the API implementation.
86
  """
87
 
88
+ user_query = state.last_user_message
89
+ logger.info("Agent handling search query", user_query=user_query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  try:
92
+ # STEP 1: Classify intent
93
+ intent_result = await classify_search_intent(user_query)
 
 
94
 
95
+ if not intent_result.get("is_search", False):
96
+ # Not a search query? The intent classifier in graph.py usually handles this,
97
+ # but if we end up here, treat it gracefully.
98
+ logger.info("Agent: Non-search input detected inside search node")
99
+
100
+ # Helper extraction just for language detection
101
+ params = await extract_search_params(user_query)
102
+ user_lang = params.get("user_language", "en")
103
+
104
+ response_text = await generate_non_search_response(user_query, user_lang)
105
+ state.temp_data["response_text"] = response_text
106
+ state.temp_data["action"] = "search_invalid"
107
+ return state
108
 
109
+ # STEP 2: Extract params
110
+ search_params = await extract_search_params(user_query)
 
 
 
 
 
 
 
 
111
 
112
  if not search_params:
113
  state.temp_data["response_text"] = "I couldn't quite understand your search. Could you try rephrasing it?"
114
  state.temp_data["action"] = "search_invalid"
115
  return state
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ # STEP 3: Select Strategy
118
+ strategy_result = await select_search_strategy(user_query, search_params)
119
+ strategy = strategy_result.get("strategy", SearchStrategy.MONGO_ONLY)
120
+
121
+ logger.info(f"Agent Strategy: {strategy} - {strategy_result.get('reasoning')}")
122
+
123
+ # STEP 4: Execute Strategy
124
+ results = []
125
+ limit = 10
126
+ currency = "XOF"
127
+
128
+ if strategy == SearchStrategy.MONGO_ONLY:
129
+ results = await search_mongodb(search_params, limit)
130
+ # Infer currency
131
+ if search_params.get("location"):
132
+ currency, _ = await infer_currency_from_location(search_params["location"])
133
+
134
+ # Fallback if no results
135
  if not results:
136
+ logger.info("Agent: MONGO_ONLY yielded 0 results, attempting fallback")
137
+ results, currency = await search_listings_hybrid(
138
+ user_query=user_query,
139
  search_params=search_params,
140
+ limit=limit,
141
  mode="relaxed"
142
  )
143
+ strategy = "QDRANT_FALLBACK"
144
+
145
+ elif strategy == SearchStrategy.QDRANT_ONLY:
146
+ results, currency = await search_listings_hybrid(
147
+ user_query=user_query,
 
 
 
148
  search_params=search_params,
149
+ limit=limit,
150
+ mode="relaxed"
151
  )
 
152
 
153
+ elif strategy == SearchStrategy.MONGO_THEN_QDRANT:
154
+ results, currency = await search_mongo_then_qdrant(
155
+ user_query=user_query,
156
+ search_params=search_params,
157
+ limit=limit
158
+ )
159
+
160
+ elif strategy == SearchStrategy.QDRANT_THEN_MONGO:
161
+ results, currency = await search_qdrant_then_mongo(
162
+ user_query=user_query,
163
+ search_params=search_params,
164
+ limit=limit
165
+ )
166
+
167
+ # STEP 5: Generate Natural Response
168
+ response_text = await generate_natural_response(
169
+ user_query=user_query,
170
+ count=len(results),
171
+ params=search_params,
172
+ listings=results,
173
+ strategy_used=strategy
174
  )
175
 
176
+ # Add conversational prompt if needed
177
+ if not results:
178
+ response_text += "\n\nWould you like me to notify you if something matches this later? Just say 'notify me'!"
179
+
180
+ # STEP 6: Finalize State
181
  state.search_results = results
182
+ state.temp_data["response_text"] = response_text
183
  state.temp_data["action"] = "search_results"
184
+ state.temp_data["inferred_currency"] = currency
185
+ state.temp_data["strategy_used"] = strategy
186
 
 
 
 
 
187
  state.transition_to(FlowState.SEARCH_RESULTS)
188
  state.transition_to(FlowState.IDLE)
189
 
190
  return state
191
+
192
  except Exception as e:
193
+ logger.error("Agent search flow failed", exc_info=e)
194
  state.set_error(str(e))
195
  return state
app/ai/services/search_extractor.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/ai/services/search_extractor.py
2
+ """
3
+ Search Parameter Extractor - Shared service for extracting search criteria from natural language.
4
+ Used by both the REST API and the AI Agent.
5
+ """
6
+
7
+ import logging
8
+ from langchain_openai import ChatOpenAI
9
+ from langchain_core.messages import SystemMessage, HumanMessage
10
+ from app.config import settings
11
+ from app.ai.agent.validators import JSONValidator
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # LLM for extraction
16
+ llm = ChatOpenAI(
17
+ api_key=settings.DEEPSEEK_API_KEY,
18
+ base_url=settings.DEEPSEEK_BASE_URL,
19
+ model="deepseek-chat",
20
+ temperature=0.1, # Low temperature for precision
21
+ )
22
+
23
+ EXTRACTION_PROMPT = """You are a multilingual search assistant. Extract search criteria and output in ENGLISH.
24
+
25
+ User's query: "{query}"
26
+
27
+ TASK:
28
+ 1. Understand the query in ANY language
29
+ 2. FIX ALL TYPOS (especially city names)
30
+ 3. Translate values to ENGLISH
31
+ 4. Detect user's language
32
+
33
+ LOCATION TYPO FIXES (IMPORTANT):
34
+ - "clalavi"/"callavi"/"clavai" → "Calavi"
35
+ - "cotonoo"/"cotonu"/"kotonoo" → "Cotonou"
36
+ - "lagoss"/"laogs"/"lagos" → "Lagos"
37
+ - "porto novo"/"portonovo" → "Porto-Novo"
38
+ - "abujaa"/"abja" → "Abuja"
39
+
40
+ LISTING TYPE (translate to English):
41
+ - "en vente"/"à vendre"/"for sale" → "sale"
42
+ - "à louer"/"for rent"/"location" → "rent"
43
+ - "courte durée"/"short stay" → "short-stay"
44
+ - "colocataire"/"roommate" → "roommate"
45
+
46
+ PRICE PARSING:
47
+ - "20k"/"20000" → 20000
48
+ - "house of 20k" → min_price: 18000, max_price: 22000
49
+ - "under 50k" → max_price: 50000
50
+ - "above 100k" → min_price: 100000
51
+
52
+ AMENITIES - Extract mentioned amenities:
53
+ - "balcony", "pool", "swimming pool", "parking", "wifi", "gym", "security"
54
+ - "furnished", "air conditioning", "garden", "elevator", "kitchen"
55
+ - Also detect contextual features like "close to school", "near beach", "quiet neighborhood"
56
+
57
+ Return ONLY valid JSON (fields null if not found):
58
+ {{
59
+ "location": string or null,
60
+ "min_price": number or null,
61
+ "max_price": number or null,
62
+ "bedrooms": number or null,
63
+ "bathrooms": number or null,
64
+ "listing_type": "rent" | "sale" | "short-stay" | "roommate" | null,
65
+ "amenities": [],
66
+ "contextual_features": [],
67
+ "user_language": "en" | "fr" | "es" | "pt" | etc.
68
+ }}"""
69
+
70
+
71
+ async def extract_search_params(query: str) -> dict:
72
+ """
73
+ Extract and normalize search parameters from natural language query.
74
+
75
+ Args:
76
+ query: User's natural language query
77
+
78
+ Returns:
79
+ Dict of normalized parameters
80
+ """
81
+ try:
82
+ prompt = EXTRACTION_PROMPT.format(query=query)
83
+
84
+ response = await llm.ainvoke([
85
+ SystemMessage(content="Extract search params. Fix typos. Translate to English. JSON only."),
86
+ HumanMessage(content=prompt)
87
+ ])
88
+
89
+ response_text = response.content if hasattr(response, 'content') else str(response)
90
+ validation = JSONValidator.extract_and_validate(response_text)
91
+
92
+ if not validation.is_valid:
93
+ logger.warning(f"Parameter extraction validation failed for: {query}")
94
+ return {"user_language": "en"}
95
+
96
+ # Clean up amenities (lowercase)
97
+ data = validation.data
98
+ if data.get("amenities"):
99
+ data["amenities"] = [a.lower() for a in data["amenities"]]
100
+
101
+ logger.info(f"Extracted params: {data}")
102
+ return data
103
+
104
+ except Exception as e:
105
+ logger.error(f"Extraction error: {e}")
106
+ return {"user_language": "en"}
app/ai/services/search_intent_classifier.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/ai/services/search_intent_classifier.py
2
+ """
3
+ Search Intent Classifier - Detects if user input is a property search query or casual conversation.
4
+
5
+ This prevents the search endpoint from trying to search for greetings, thank yous, or general questions.
6
+ """
7
+
8
+ import logging
9
+ from typing import Dict
10
+ from langchain_openai import ChatOpenAI
11
+ from langchain_core.messages import SystemMessage, HumanMessage
12
+ from app.config import settings
13
+ from app.ai.agent.validators import JSONValidator
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # LLM for intent classification
18
+ llm = ChatOpenAI(
19
+ api_key=settings.DEEPSEEK_API_KEY,
20
+ base_url=settings.DEEPSEEK_BASE_URL,
21
+ model="deepseek-chat",
22
+ temperature=0.1, # Low temperature for consistent classification
23
+ )
24
+
25
+
26
+ INTENT_CLASSIFICATION_PROMPT = """You are a search intent classifier for a real estate platform.
27
+
28
+ User input: "{user_input}"
29
+
30
+ TASK: Determine if this input is a PROPERTY SEARCH QUERY or NOT.
31
+
32
+ PROPERTY SEARCH indicators:
33
+ - Mentions location (Cotonou, Lagos, Calavi, etc.)
34
+ - Mentions property features (bedrooms, bathrooms, balcony, pool, etc.)
35
+ - Mentions price or budget
36
+ - Mentions property type (house, apartment, room, etc.)
37
+ - Mentions listing type (rent, sale, short stay)
38
+ - Uses search verbs (show me, find, search, looking for, need, want)
39
+ - General property searches (houses, apartments, properties)
40
+
41
+ NOT property search:
42
+ - Greetings (hello, hi, hey, bonjour, salut)
43
+ - Thanks/appreciation (thank you, thanks, merci)
44
+ - Casual questions (how are you, what can you do, how does this work)
45
+ - Confirmations (yes, no, ok, sure)
46
+ - General chat (talking about weather, life, etc.)
47
+ - Commands unrelated to search (help, contact, support)
48
+
49
+ Examples:
50
+ ✅ "show me 3 bedroom apartments in Cotonou" → SEARCH
51
+ ✅ "houses with pool" → SEARCH
52
+ ✅ "apartment under 200k" → SEARCH
53
+ ✅ "properties in Lagos" → SEARCH
54
+ ✅ "je cherche maison à Calavi" → SEARCH
55
+ ✅ "cozy place near beach" → SEARCH
56
+ ❌ "hello there" → NOT SEARCH
57
+ ❌ "thank you so much" → NOT SEARCH
58
+ ❌ "how are you?" → NOT SEARCH
59
+ ❌ "yes" → NOT SEARCH
60
+ ❌ "what can you do?" → NOT SEARCH
61
+
62
+ Return ONLY valid JSON:
63
+ {{
64
+ "is_search": true or false,
65
+ "confidence": 0.0 to 1.0,
66
+ "reason": "brief explanation"
67
+ }}"""
68
+
69
+
70
+ async def classify_search_intent(user_input: str) -> Dict:
71
+ """
72
+ Classify if user input is a property search query.
73
+
74
+ Args:
75
+ user_input: User's message
76
+
77
+ Returns:
78
+ Dict with:
79
+ - is_search: bool
80
+ - confidence: float (0.0 - 1.0)
81
+ - reason: str
82
+ """
83
+
84
+ # Quick pattern matching for obvious cases
85
+ user_lower = user_input.lower().strip()
86
+
87
+ # Very short inputs are usually not searches
88
+ if len(user_lower) < 3:
89
+ return {
90
+ "is_search": False,
91
+ "confidence": 0.95,
92
+ "reason": "Input too short to be a search query"
93
+ }
94
+
95
+ # Common greetings
96
+ greetings = ["hi", "hello", "hey", "bonjour", "salut", "good morning", "good evening"]
97
+ if user_lower in greetings:
98
+ return {
99
+ "is_search": False,
100
+ "confidence": 1.0,
101
+ "reason": "Common greeting detected"
102
+ }
103
+
104
+ # Common thanks
105
+ thanks = ["thanks", "thank you", "merci", "thx", "ty"]
106
+ if user_lower in thanks:
107
+ return {
108
+ "is_search": False,
109
+ "confidence": 1.0,
110
+ "reason": "Thank you message detected"
111
+ }
112
+
113
+ # Common confirmations
114
+ confirmations = ["yes", "no", "ok", "okay", "sure", "oui", "non"]
115
+ if user_lower in confirmations:
116
+ return {
117
+ "is_search": False,
118
+ "confidence": 0.9,
119
+ "reason": "Confirmation word detected"
120
+ }
121
+
122
+ # Use LLM for complex cases
123
+ try:
124
+ prompt = INTENT_CLASSIFICATION_PROMPT.format(user_input=user_input)
125
+
126
+ response = await llm.ainvoke([
127
+ SystemMessage(content="Classify if input is a property search. Return JSON only."),
128
+ HumanMessage(content=prompt)
129
+ ])
130
+
131
+ response_text = response.content if hasattr(response, 'content') else str(response)
132
+ validation = JSONValidator.extract_and_validate(response_text)
133
+
134
+ if not validation.is_valid:
135
+ logger.warning(f"Intent classification validation failed for: {user_input}")
136
+ return {
137
+ "is_search": True, # Default to search if uncertain
138
+ "confidence": 0.5,
139
+ "reason": "Classification uncertain, defaulting to search"
140
+ }
141
+
142
+ result = validation.data
143
+ logger.info(f"Intent classified: is_search={result.get('is_search')}, confidence={result.get('confidence')}")
144
+ return result
145
+
146
+ except Exception as e:
147
+ logger.error(f"Intent classification error: {e}")
148
+ # Default to search on error (better to show no results than refuse valid searches)
149
+ return {
150
+ "is_search": True,
151
+ "confidence": 0.5,
152
+ "reason": "Classification failed, defaulting to search"
153
+ }
app/ai/services/search_responder.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/ai/services/search_responder.py
2
+ """
3
+ Search Responder - Shared service for generating natural, multilingual, and enthusiastic search responses.
4
+ Used by both the REST API and the AI Agent.
5
+ """
6
+
7
+ import logging
8
+ from langchain_openai import ChatOpenAI
9
+ from langchain_core.messages import SystemMessage, HumanMessage
10
+ from app.config import settings
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # LLM for response generation
15
+ llm = ChatOpenAI(
16
+ api_key=settings.DEEPSEEK_API_KEY,
17
+ base_url=settings.DEEPSEEK_BASE_URL,
18
+ model="deepseek-chat",
19
+ temperature=0.3,
20
+ )
21
+
22
+ RESPONSE_GENERATION_PROMPT = """You are AIDA, a friendly real estate search assistant.
23
+
24
+ User's query: "{user_query}"
25
+ User's language: {user_language}
26
+ Found: {count} properties
27
+ Strategy used: {strategy}
28
+
29
+ Properties summary:
30
+ {properties_summary}
31
+
32
+ YOUR TASK - Generate a NATURAL, CONVERSATIONAL response:
33
+
34
+ 1. **Language**: Respond in {user_language} (the user's language)
35
+
36
+ 2. **Tone**: Friendly, enthusiastic, helpful (like a real estate agent excited to show properties)
37
+
38
+ 3. **Format**:
39
+ - If {count} > 0:
40
+ * Start with enthusiasm: "I found {count} amazing properties..." or "Great news! I found..."
41
+ * MENTION SPECIFIC FEATURES they asked for that were found (balcony, pool, close to school, etc.)
42
+ * Use emojis sparingly for friendliness: 🏠 🌟 ✨
43
+ * Example GOOD: "I found 3 fantastic properties with a balcony, swimming pool, and close to a school in Cotonou! Check them out 🏠"
44
+ * Example BAD: "Found 3 properties in your area"
45
+
46
+ - If {count} == 0:
47
+ * Be empathetic and helpful
48
+ * Suggest trying different criteria
49
+ * Example: "I couldn't find any properties matching those exact criteria. Try adjusting your budget or location? 🔍"
50
+
51
+ 4. **Keep it concise**: 1-2 sentences max. The user will see full details in the property cards below.
52
+
53
+ 5. **NO generic responses**: Always mention specific features when available
54
+
55
+ Examples:
56
+ ✅ GOOD (English): "I found 5 beautiful apartments with balconies and parking in Cotonou! Prices range from 150k to 300k 🏠"
57
+ ✅ GOOD (French): "J'ai trouvé 3 maisons magnifiques avec piscine et proche d'une école à Calavi ! Parfait pour votre famille 🌟"
58
+ ❌ BAD: "Found 3 properties in your area"
59
+ ❌ BAD: "Here are your search results"
60
+
61
+ Write ONLY the response message (no quotes, no extra formatting)."""
62
+
63
+
64
+ async def generate_natural_response(
65
+ user_query: str,
66
+ count: int,
67
+ params: dict,
68
+ listings: list,
69
+ strategy_used: str = "SEARCH"
70
+ ) -> str:
71
+ """
72
+ Generate natural, conversational response mentioning specific amenities.
73
+
74
+ Args:
75
+ user_query: Original user text
76
+ count: Number of results found
77
+ params: Extracted search parameters
78
+ listings: List of found listings (for summary)
79
+ strategy_used: Name of strategy used (for context)
80
+ """
81
+ try:
82
+ user_lang = params.get("user_language", "en")
83
+
84
+ # Build properties summary from top results
85
+ if listings:
86
+ properties_summary = ""
87
+ # Take top 5 for summary context
88
+ top_listings = listings[:5]
89
+ for i, listing in enumerate(top_listings, 1):
90
+ # Handle both dict (raw) and object (Pydantic) access
91
+ get_val = lambda obj, key, default: obj.get(key, default) if isinstance(obj, dict) else getattr(obj, key, default)
92
+
93
+ title = get_val(listing, "title", "Property")
94
+ location = get_val(listing, "location", "Unknown")
95
+ price = get_val(listing, "price", 0)
96
+ currency = get_val(listing, "currency", "XOF")
97
+ bedrooms = get_val(listing, "bedrooms", "?")
98
+ amenities = get_val(listing, "amenities", [])
99
+
100
+ properties_summary += f"\n{i}. {title} in {location} - {currency} {price:,.0f} - {bedrooms} beds"
101
+ if amenities:
102
+ if isinstance(amenities, list):
103
+ properties_summary += f" - Amenities: {', '.join(amenities[:3])}"
104
+ else:
105
+ properties_summary = "No properties found"
106
+
107
+ # Generate response
108
+ prompt = RESPONSE_GENERATION_PROMPT.format(
109
+ user_query=user_query,
110
+ user_language=user_lang,
111
+ count=count,
112
+ strategy=strategy_used,
113
+ properties_summary=properties_summary
114
+ )
115
+
116
+ response = await llm.ainvoke([
117
+ SystemMessage(content=f"Generate natural, enthusiastic search results message in {user_lang}. Mention specific features."),
118
+ HumanMessage(content=prompt)
119
+ ])
120
+
121
+ message = response.content.strip()
122
+
123
+ # Remove quotes if LLM added them
124
+ if message.startswith('"') and message.endswith('"'):
125
+ message = message[1:-1]
126
+ if message.startswith("'") and message.endswith("'"):
127
+ message = message[1:-1]
128
+
129
+ return message
130
+
131
+ except Exception as e:
132
+ logger.error(f"Response generation error: {e}")
133
+ # Fallback
134
+ if count > 0:
135
+ return f"Found {count} properties! Check them out below 🏠"
136
+ else:
137
+ return "No properties found. Try adjusting your search criteria 🔍"
138
+
139
+
140
+ async def generate_non_search_response(user_input: str, user_language: str = "en") -> str:
141
+ """Generate friendly response for non-search inputs."""
142
+
143
+ templates = {
144
+ "en": [
145
+ "Hey! I'm your property search assistant 🏠 Try asking me to find properties like 'Show me 3-bedroom apartments in Cotonou' 🔍",
146
+ "Hi there! I help you search for properties. Try something like 'Find houses with a pool in Lagos' 🏊‍♂️",
147
+ "Hello! I'm here to help you find your perfect property. Just describe what you're looking for! 🏡"
148
+ ],
149
+ "fr": [
150
+ "Salut ! Je suis votre assistant de recherche immobilière 🏠 Essayez de me demander 'Trouve-moi des appartements 3 chambres à Cotonou' 🔍",
151
+ "Bonjour ! Je vous aide à chercher des propriétés. Essayez quelque chose comme 'Maisons avec piscine à Lagos' 🏊‍♂️",
152
+ "Coucou ! Je suis là pour vous aider à trouver votre propriété idéale. Décrivez simplement ce que vous cherchez ! 🏡"
153
+ ]
154
+ }
155
+
156
+ # Pick template based on language
157
+ lang_templates = templates.get(user_language, templates["en"])
158
+
159
+ # Check if it's a greeting
160
+ user_lower = user_input.lower()
161
+ if any(greeting in user_lower for greeting in ["hello", "hi", "hey", "bonjour", "salut"]):
162
+ return lang_templates[0]
163
+ elif any(thanks in user_lower for thanks in ["thank", "merci", "thx"]):
164
+ thank_responses = {
165
+ "en": "You're welcome! Let me know if you need to search for more properties! 😊",
166
+ "fr": "De rien ! Faites-moi savoir si vous voulez chercher d'autres propriétés ! 😊"
167
+ }
168
+ return thank_responses.get(user_language, thank_responses["en"])
169
+ else:
170
+ return lang_templates[2]
app/ai/services/search_service.py CHANGED
@@ -399,3 +399,242 @@ async def search_listings_hybrid(
399
  )
400
 
401
  return results, currency
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  )
400
 
401
  return results, currency
402
+
403
+
404
+ # ============================================================
405
+ # ADVANCED HYBRID STRATEGIES
406
+ # ============================================================
407
+
408
+ async def search_mongo_then_qdrant(
409
+ user_query: str,
410
+ search_params: Dict[str, Any],
411
+ limit: int = 10
412
+ ) -> Tuple[List[Dict], str]:
413
+ """
414
+ Strategy: Filter by location/price in MongoDB first, then use Qdrant for semantic ranking.
415
+
416
+ Use case: "house close to international school in Calavi"
417
+ - Step 1: MongoDB filters for Calavi
418
+ - Step 2: Qdrant semantic search within those results for "close to school"
419
+
420
+ Args:
421
+ user_query: Original user query for semantic understanding
422
+ search_params: Extracted parameters (must have location or price)
423
+ limit: Max results to return
424
+
425
+ Returns:
426
+ Tuple of (results, inferred_currency)
427
+ """
428
+
429
+ logger.info("MONGO_THEN_QDRANT strategy", location=search_params.get("location"))
430
+
431
+ if not qdrant_client:
432
+ logger.error("Qdrant client not available")
433
+ return [], "XOF"
434
+
435
+ from app.database import get_db
436
+
437
+ try:
438
+ db = await get_db()
439
+
440
+ # Step 1: Build MongoDB filter for location/price/basic filters
441
+ mongo_query = {"status": "active"}
442
+
443
+ if search_params.get("location"):
444
+ location = search_params["location"]
445
+ mongo_query["location"] = {"$regex": location, "$options": "i"}
446
+
447
+ if search_params.get("min_price"):
448
+ mongo_query["price"] = {"$gte": search_params["min_price"]}
449
+
450
+ if search_params.get("max_price"):
451
+ if "price" in mongo_query:
452
+ mongo_query["price"]["$lte"] = search_params["max_price"]
453
+ else:
454
+ mongo_query["price"] = {"$lte": search_params["max_price"]}
455
+
456
+ if search_params.get("bedrooms"):
457
+ mongo_query["bedrooms"] = {"$gte": search_params["bedrooms"]}
458
+
459
+ if search_params.get("bathrooms"):
460
+ mongo_query["bathrooms"] = {"$gte": search_params["bathrooms"]}
461
+
462
+ if search_params.get("listing_type"):
463
+ mongo_query["listing_type"] = {"$regex": search_params["listing_type"], "$options": "i"}
464
+
465
+ logger.info("MongoDB pre-filter", query=mongo_query)
466
+
467
+ # Fetch more results than needed for semantic filtering
468
+ cursor = db.listings.find(mongo_query).limit(limit * 3)
469
+ mongo_results = []
470
+ async for doc in cursor:
471
+ if "_id" in doc:
472
+ doc["_id"] = str(doc["_id"])
473
+ mongo_results.append(doc)
474
+
475
+ logger.info(f"MongoDB returned {len(mongo_results)} candidates")
476
+
477
+ if not mongo_results:
478
+ # No results from MongoDB filter
479
+ currency = "XOF"
480
+ if search_params.get("location"):
481
+ currency, _ = await infer_currency_from_location(search_params["location"])
482
+ return [], currency
483
+
484
+ # Step 2: Use Qdrant to semantically rank these MongoDB results
485
+ # Get their IDs
486
+ mongo_ids = [doc.get("_id") for doc in mongo_results]
487
+
488
+ # Embed the user query for semantic search
489
+ query_vector = await embed_query(user_query)
490
+
491
+ if not query_vector:
492
+ # If embedding fails, return MongoDB results as-is
493
+ logger.warning("Embedding failed, returning MongoDB results")
494
+ currency = "XOF"
495
+ if search_params.get("location"):
496
+ currency, _ = await infer_currency_from_location(search_params["location"])
497
+ return mongo_results[:limit], currency
498
+
499
+ # Search Qdrant but filter to only these MongoDB IDs
500
+ from qdrant_client.models import Filter, FieldCondition, MatchAny
501
+
502
+ id_filter = Filter(
503
+ must=[
504
+ FieldCondition(
505
+ key="_id",
506
+ match=MatchAny(any=mongo_ids)
507
+ )
508
+ ]
509
+ )
510
+
511
+ qdrant_results = await qdrant_client.query_points(
512
+ collection_name=COLLECTION_NAME,
513
+ query=query_vector,
514
+ query_filter=id_filter,
515
+ limit=limit,
516
+ with_payload=True
517
+ )
518
+
519
+ # Format results
520
+ final_results = []
521
+ for point in qdrant_results.points:
522
+ listing = dict(point.payload)
523
+ listing["_relevance_score"] = point.score
524
+ listing["_search_strategy"] = "MONGO_THEN_QDRANT"
525
+ final_results.append(listing)
526
+
527
+ logger.info(f"Qdrant semantic ranking returned {len(final_results)} results")
528
+
529
+ # Infer currency
530
+ currency = "XOF"
531
+ if search_params.get("location"):
532
+ currency, _ = await infer_currency_from_location(search_params["location"])
533
+
534
+ return final_results, currency
535
+
536
+ except Exception as e:
537
+ logger.error(f"MONGO_THEN_QDRANT error: {e}")
538
+ return [], "XOF"
539
+
540
+
541
+ async def search_qdrant_then_mongo(
542
+ user_query: str,
543
+ search_params: Dict[str, Any],
544
+ limit: int = 10
545
+ ) -> Tuple[List[Dict], str]:
546
+ """
547
+ Strategy: Semantic search first in Qdrant, then apply MongoDB filters.
548
+
549
+ Use case: "modern luxurious apartment" (semantic primary, filters secondary)
550
+ - Step 1: Qdrant semantic search for "modern luxurious"
551
+ - Step 2: Apply MongoDB filters to those results
552
+
553
+ Args:
554
+ user_query: Original user query for semantic search
555
+ search_params: Extracted parameters for filtering
556
+ limit: Max results to return
557
+
558
+ Returns:
559
+ Tuple of (results, inferred_currency)
560
+ """
561
+
562
+ logger.info("QDRANT_THEN_MONGO strategy")
563
+
564
+ if not qdrant_client:
565
+ logger.error("Qdrant client not available")
566
+ return [], "XOF"
567
+
568
+ try:
569
+ # Step 1: Semantic search in Qdrant (get more results for filtering)
570
+ query_vector = await embed_query(user_query)
571
+
572
+ if not query_vector:
573
+ logger.warning("Embedding failed in QDRANT_THEN_MONGO")
574
+ return [], "XOF"
575
+
576
+ # Get semantic results (no filters yet)
577
+ qdrant_results = await qdrant_client.query_points(
578
+ collection_name=COLLECTION_NAME,
579
+ query=query_vector,
580
+ limit=limit * 3, # Get more for filtering
581
+ with_payload=True
582
+ )
583
+
584
+ candidates = [dict(point.payload) for point in qdrant_results.points]
585
+
586
+ logger.info(f"Qdrant returned {len(candidates)} semantic candidates")
587
+
588
+ # Step 2: Apply MongoDB-style filters in Python
589
+ filtered_results = []
590
+
591
+ for listing in candidates:
592
+ # Check each filter
593
+ if search_params.get("location"):
594
+ location_filter = search_params["location"].lower()
595
+ listing_location = listing.get("location", "").lower()
596
+ if location_filter not in listing_location:
597
+ continue
598
+
599
+ if search_params.get("min_price"):
600
+ if listing.get("price", 0) < search_params["min_price"]:
601
+ continue
602
+
603
+ if search_params.get("max_price"):
604
+ if listing.get("price", float('inf')) > search_params["max_price"]:
605
+ continue
606
+
607
+ if search_params.get("bedrooms"):
608
+ if listing.get("bedrooms", 0) < search_params["bedrooms"]:
609
+ continue
610
+
611
+ if search_params.get("bathrooms"):
612
+ if listing.get("bathrooms", 0) < search_params["bathrooms"]:
613
+ continue
614
+
615
+ if search_params.get("listing_type"):
616
+ listing_type_filter = search_params["listing_type"].lower()
617
+ listing_type = listing.get("listing_type", "").lower()
618
+ if listing_type_filter not in listing_type:
619
+ continue
620
+
621
+ # Passed all filters
622
+ listing["_search_strategy"] = "QDRANT_THEN_MONGO"
623
+ filtered_results.append(listing)
624
+
625
+ if len(filtered_results) >= limit:
626
+ break
627
+
628
+ logger.info(f"After filtering: {len(filtered_results)} results")
629
+
630
+ # Infer currency
631
+ currency = "XOF"
632
+ if search_params.get("location"):
633
+ currency, _ = await infer_currency_from_location(search_params["location"])
634
+
635
+ return filtered_results, currency
636
+
637
+ except Exception as e:
638
+ logger.error(f"QDRANT_THEN_MONGO error: {e}")
639
+ return [], "XOF"
640
+
app/ai/services/search_strategy_selector.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/ai/services/search_strategy_selector.py
2
+ """
3
+ Search Strategy Selector - Intelligently chooses the optimal search strategy.
4
+
5
+ Strategies:
6
+ - MONGO_ONLY: Pure MongoDB filters (structured queries with clear filters)
7
+ - QDRANT_ONLY: Pure semantic search (vague/descriptive queries)
8
+ - MONGO_THEN_QDRANT: Filter by location/price in MongoDB, then semantic search within results
9
+ - QDRANT_THEN_MONGO: Semantic search first, then apply MongoDB filters
10
+ """
11
+
12
+ import logging
13
+ from typing import Dict, Literal
14
+ from enum import Enum
15
+ from langchain_openai import ChatOpenAI
16
+ from langchain_core.messages import SystemMessage, HumanMessage
17
+ from app.config import settings
18
+ from app.ai.agent.validators import JSONValidator
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class SearchStrategy(str, Enum):
24
+ """Available search strategies"""
25
+ MONGO_ONLY = "MONGO_ONLY"
26
+ QDRANT_ONLY = "QDRANT_ONLY"
27
+ MONGO_THEN_QDRANT = "MONGO_THEN_QDRANT"
28
+ QDRANT_THEN_MONGO = "QDRANT_THEN_MONGO"
29
+
30
+
31
+ # LLM for strategy selection
32
+ llm = ChatOpenAI(
33
+ api_key=settings.DEEPSEEK_API_KEY,
34
+ base_url=settings.DEEPSEEK_BASE_URL,
35
+ model="deepseek-chat",
36
+ temperature=0.2,
37
+ )
38
+
39
+
40
+ STRATEGY_SELECTION_PROMPT = """You are a search strategy selector for a real estate platform.
41
+
42
+ User query: "{user_query}"
43
+
44
+ Extracted parameters:
45
+ {search_params}
46
+
47
+ AVAILABLE STRATEGIES:
48
+
49
+ 1. MONGO_ONLY - Fast MongoDB filtering
50
+ Use when: Query has clear structured filters (location, price, bedrooms, etc.) and NO semantic/contextual requirements
51
+ Example: "3 bedroom apartment in Cotonou under 200k"
52
+
53
+ 2. QDRANT_ONLY - Pure semantic search
54
+ Use when: Query is vague, descriptive, or contextual with NO specific location/price filters
55
+ Example: "cozy modern apartment with good vibes"
56
+
57
+ 3. MONGO_THEN_QDRANT - Filter first, then semantic search
58
+ Use when: Query has a specific location/price filter AND semantic requirements
59
+ Example: "house close to international school in Calavi" (MongoDB for Calavi, Qdrant for "close to school")
60
+ Example: "family-friendly apartment in Cotonou" (MongoDB for Cotonou, Qdrant for "family-friendly")
61
+
62
+ 4. QDRANT_THEN_MONGO - Semantic first, then filter
63
+ Use when: Primary focus is semantic/descriptive, but has minor filters
64
+ Example: "luxurious modern apartment" (Qdrant for "luxurious modern", then MongoDB can filter)
65
+
66
+ DECISION RULES:
67
+ - If location specified + semantic features (near school, quiet, family-friendly, etc.) → MONGO_THEN_QDRANT
68
+ - If only structured filters (location, price, beds, baths) → MONGO_ONLY
69
+ - If only semantic/descriptive (cozy, modern, good vibes, spacious) → QDRANT_ONLY
70
+ - If semantic is primary but has some filters → QDRANT_THEN_MONGO
71
+
72
+ Return ONLY valid JSON:
73
+ {{
74
+ "strategy": "MONGO_ONLY" | "QDRANT_ONLY" | "MONGO_THEN_QDRANT" | "QDRANT_THEN_MONGO",
75
+ "reasoning": "brief explanation of why this strategy was chosen",
76
+ "has_semantic_features": true or false,
77
+ "has_structured_filters": true or false
78
+ }}"""
79
+
80
+
81
+ async def select_search_strategy(user_query: str, search_params: Dict) -> Dict:
82
+ """
83
+ Select optimal search strategy based on query and extracted parameters.
84
+
85
+ Args:
86
+ user_query: Original user query
87
+ search_params: Extracted search parameters
88
+
89
+ Returns:
90
+ Dict with:
91
+ - strategy: SearchStrategy enum value
92
+ - reasoning: str
93
+ - has_semantic_features: bool
94
+ - has_structured_filters: bool
95
+ """
96
+
97
+ # Quick heuristics for obvious cases
98
+ has_location = bool(search_params.get("location"))
99
+ has_price = bool(search_params.get("min_price") or search_params.get("max_price"))
100
+ has_bedrooms = bool(search_params.get("bedrooms"))
101
+ has_bathrooms = bool(search_params.get("bathrooms"))
102
+ has_listing_type = bool(search_params.get("listing_type"))
103
+ has_amenities = bool(search_params.get("amenities") and len(search_params.get("amenities", [])) > 0)
104
+
105
+ structured_count = sum([has_location, has_price, has_bedrooms, has_bathrooms, has_listing_type])
106
+
107
+ # Detect semantic keywords in query
108
+ semantic_keywords = [
109
+ "close to", "near", "nearby", "walking distance",
110
+ "quiet", "peaceful", "calm", "serene",
111
+ "family-friendly", "family", "safe",
112
+ "modern", "contemporary", "new", "renovated",
113
+ "cozy", "comfortable", "warm",
114
+ "luxurious", "luxury", "prestigious", "elegant",
115
+ "spacious", "large", "big", "roomy",
116
+ "bright", "sunny", "well-lit",
117
+ "good vibes", "nice area", "good neighborhood",
118
+ "beach", "school", "market", "downtown", "city center",
119
+ ]
120
+
121
+ query_lower = user_query.lower()
122
+ has_semantic = any(keyword in query_lower for keyword in semantic_keywords)
123
+
124
+ # Simple rule-based decision for clear cases
125
+ if structured_count >= 2 and not has_semantic and not has_amenities:
126
+ # Pure structured query
127
+ return {
128
+ "strategy": SearchStrategy.MONGO_ONLY,
129
+ "reasoning": "Query has multiple structured filters and no semantic features",
130
+ "has_semantic_features": False,
131
+ "has_structured_filters": True
132
+ }
133
+
134
+ if structured_count == 0 and (has_semantic or has_amenities):
135
+ # Pure semantic query
136
+ return {
137
+ "strategy": SearchStrategy.QDRANT_ONLY,
138
+ "reasoning": "Query is purely semantic/descriptive with no structured filters",
139
+ "has_semantic_features": True,
140
+ "has_structured_filters": False
141
+ }
142
+
143
+ if has_location and has_semantic:
144
+ # Location + semantic features
145
+ return {
146
+ "strategy": SearchStrategy.MONGO_THEN_QDRANT,
147
+ "reasoning": "Query has location filter and semantic features - filter by location first, then semantic search",
148
+ "has_semantic_features": True,
149
+ "has_structured_filters": True
150
+ }
151
+
152
+ # Use LLM for complex cases
153
+ try:
154
+ params_str = "\n".join([f"- {k}: {v}" for k, v in search_params.items() if v])
155
+ prompt = STRATEGY_SELECTION_PROMPT.format(
156
+ user_query=user_query,
157
+ search_params=params_str if params_str else "No parameters extracted"
158
+ )
159
+
160
+ response = await llm.ainvoke([
161
+ SystemMessage(content="Select optimal search strategy. Return JSON only."),
162
+ HumanMessage(content=prompt)
163
+ ])
164
+
165
+ response_text = response.content if hasattr(response, 'content') else str(response)
166
+ validation = JSONValidator.extract_and_validate(response_text)
167
+
168
+ if not validation.is_valid:
169
+ logger.warning(f"Strategy selection validation failed, defaulting to MONGO_ONLY")
170
+ return {
171
+ "strategy": SearchStrategy.MONGO_ONLY,
172
+ "reasoning": "Strategy selection failed, using MongoDB filters",
173
+ "has_semantic_features": False,
174
+ "has_structured_filters": True
175
+ }
176
+
177
+ result = validation.data
178
+ logger.info(f"Strategy selected: {result.get('strategy')} - {result.get('reasoning')}")
179
+ return result
180
+
181
+ except Exception as e:
182
+ logger.error(f"Strategy selection error: {e}")
183
+ # Default to MONGO_ONLY on error
184
+ return {
185
+ "strategy": SearchStrategy.MONGO_ONLY,
186
+ "reasoning": "Strategy selection error, defaulting to MongoDB",
187
+ "has_semantic_features": False,
188
+ "has_structured_filters": True
189
+ }
app/routes/search.py CHANGED
@@ -1,15 +1,14 @@
1
  # ============================================================
2
- # app/routes/search.py - AIDA Reliable Hybrid Search
3
  # ============================================================
4
  """
5
- BULLETPROOF SEARCH:
6
- 1. User types in ANY language with ANY typos
7
- 2. AI normalizes to English
8
- 3. MongoDB tries first (fast, strict)
9
- 4. Qdrant fallback if no results (semantic, fuzzy)
10
- 5. Response in user's language
11
 
12
- Goal: If property exists, user WILL find it.
13
  """
14
 
15
  import logging
@@ -21,27 +20,21 @@ from app.database import get_db
21
  from app.models.listing import Listing
22
  from app.guards.jwt_guard import get_current_user
23
 
24
- # Import LLM
25
- from langchain_openai import ChatOpenAI
26
- from langchain_core.messages import SystemMessage, HumanMessage
27
- from app.config import settings
28
- from app.ai.agent.validators import JSONValidator
29
-
30
- # Import Qdrant hybrid search
31
- from app.ai.services.search_service import search_listings_hybrid
 
 
32
 
33
  router = APIRouter(tags=["AIDA Search"])
34
 
35
  logger = logging.getLogger(__name__)
36
 
37
- # LLM
38
- llm = ChatOpenAI(
39
- api_key=settings.DEEPSEEK_API_KEY,
40
- base_url=settings.DEEPSEEK_BASE_URL,
41
- model="deepseek-chat",
42
- temperature=0.2,
43
- )
44
-
45
 
46
  # ============================================================
47
  # SCHEMAS
@@ -58,76 +51,7 @@ class SearchResponseDto(BaseModel):
58
  data: List[dict]
59
  total: int
60
  search_params: Optional[dict] = None
61
-
62
-
63
- # ============================================================
64
- # EXTRACTION PROMPT
65
- # ============================================================
66
-
67
- EXTRACTION_PROMPT = """You are a multilingual search assistant. Extract search criteria and output in ENGLISH.
68
-
69
- User's query: "{query}"
70
-
71
- TASK:
72
- 1. Understand the query in ANY language
73
- 2. FIX ALL TYPOS (especially city names)
74
- 3. Translate values to ENGLISH
75
- 4. Detect user's language
76
-
77
- LOCATION TYPO FIXES (IMPORTANT):
78
- - "clalavi"/"callavi"/"clavai" → "Calavi"
79
- - "cotonoo"/"cotonu"/"kotonoo" → "Cotonou"
80
- - "lagoss"/"laogs"/"lagos" → "Lagos"
81
- - "porto novo"/"portonovo" → "Porto-Novo"
82
- - "abujaa"/"abja" → "Abuja"
83
-
84
- LISTING TYPE (translate to English):
85
- - "en vente"/"à vendre"/"for sale" → "sale"
86
- - "à louer"/"for rent"/"location" → "rent"
87
- - "courte durée"/"short stay" → "short-stay"
88
- - "colocataire"/"roommate" → "roommate"
89
-
90
- PRICE PARSING:
91
- - "20k"/"20000" → 20000
92
- - "house of 20k" → min_price: 18000, max_price: 22000
93
- - "under 50k" → max_price: 50000
94
- - "above 100k" → min_price: 100000
95
-
96
- Return ONLY valid JSON:
97
- {{
98
- "location": string or null,
99
- "min_price": number or null,
100
- "max_price": number or null,
101
- "bedrooms": number or null,
102
- "bathrooms": number or null,
103
- "listing_type": "rent" | "sale" | "short-stay" | "roommate" | null,
104
- "amenities": [],
105
- "user_language": "en" | "fr" | "es" | "pt" | etc.
106
- }}"""
107
-
108
-
109
- async def extract_search_params(query: str) -> dict:
110
- """Extract and normalize search parameters."""
111
- try:
112
- prompt = EXTRACTION_PROMPT.format(query=query)
113
-
114
- response = await llm.ainvoke([
115
- SystemMessage(content="Extract search params. Fix typos. Translate to English. JSON only."),
116
- HumanMessage(content=prompt)
117
- ])
118
-
119
- response_text = response.content if hasattr(response, 'content') else str(response)
120
- validation = JSONValidator.extract_and_validate(response_text)
121
-
122
- if not validation.is_valid:
123
- return {"user_language": "en"}
124
-
125
- logger.info(f"Extracted: {validation.data}")
126
- return validation.data
127
-
128
- except Exception as e:
129
- logger.error(f"Extraction error: {e}")
130
- return {"user_language": "en"}
131
 
132
 
133
  # ============================================================
@@ -155,6 +79,9 @@ async def search_mongodb(params: dict, limit: int = 10) -> list:
155
  if params.get("bedrooms"):
156
  query["bedrooms"] = {"$gte": params["bedrooms"]}
157
 
 
 
 
158
  if params.get("listing_type"):
159
  query["listing_type"] = {"$regex": params["listing_type"], "$options": "i"}
160
 
@@ -177,84 +104,6 @@ async def search_mongodb(params: dict, limit: int = 10) -> list:
177
  return []
178
 
179
 
180
- # ============================================================
181
- # QDRANT FALLBACK (Semantic, but still respect location)
182
- # ============================================================
183
-
184
- async def search_qdrant_fallback(query: str, params: dict, limit: int = 10) -> list:
185
- """Semantic search fallback - but STILL filter by location if specified."""
186
- try:
187
- logger.info("Trying Qdrant semantic fallback...")
188
- results, _ = await search_listings_hybrid(
189
- user_query=query,
190
- search_params=params,
191
- limit=limit * 3, # Get more results to filter
192
- mode="relaxed"
193
- )
194
-
195
- # IMPORTANT: Filter by location if user specified one
196
- # This prevents returning random listings from other cities
197
- location_filter = params.get("location")
198
- if location_filter and results:
199
- filtered = []
200
- for doc in results:
201
- doc_location = doc.get("location", "")
202
- # Case-insensitive partial match
203
- if location_filter.lower() in doc_location.lower():
204
- filtered.append(doc)
205
- logger.info(f"Qdrant: {len(results)} raw → {len(filtered)} after location filter")
206
- return filtered[:limit]
207
-
208
- return results[:limit]
209
- except Exception as e:
210
- logger.error(f"Qdrant fallback error: {e}")
211
- return []
212
-
213
-
214
- # ============================================================
215
- # RESPONSE GENERATOR
216
- # ============================================================
217
-
218
- async def generate_message(query: str, count: int, params: dict, used_fallback: bool) -> str:
219
- """Generate response in user's language."""
220
- try:
221
- user_lang = params.get("user_language", "en")
222
- location = params.get("location", "")
223
-
224
- # Quick templates for common cases (no LLM call needed)
225
- if count == 0:
226
- templates = {
227
- "en": "No properties found. Try adjusting your search.",
228
- "fr": "Aucune propriété trouvée. Essayez d'autres critères.",
229
- "es": "No se encontraron propiedades. Intente otra búsqueda.",
230
- "pt": "Nenhuma propriedade encontrada. Tente outros critérios."
231
- }
232
- return templates.get(user_lang, templates["en"])
233
-
234
- # Build response with LLM for natural phrasing
235
- prompt = f"""Generate a 1-sentence search result message.
236
- Count: {count} properties found
237
- Location: {location or 'search area'}
238
- Language: {user_lang}
239
-
240
- Examples:
241
- - en: "Found {count} properties in {location}!"
242
- - fr: "Voici {count} propriétés à {location} !"
243
-
244
- Write ONLY the message."""
245
-
246
- response = await llm.ainvoke([
247
- SystemMessage(content=f"Respond in {user_lang}. One sentence only."),
248
- HumanMessage(content=prompt)
249
- ])
250
-
251
- return response.content.strip()
252
-
253
- except Exception as e:
254
- logger.error(f"Message error: {e}")
255
- return f"Found {count} properties!" if count > 0 else "No properties found."
256
-
257
-
258
  # ============================================================
259
  # MAIN SEARCH ENDPOINT
260
  # ============================================================
@@ -265,33 +114,89 @@ async def aida_search(
265
  current_user: dict = Depends(get_current_user),
266
  ):
267
  """
268
- RELIABLE HYBRID SEARCH
269
-
270
- 1. Extract & normalize query (any language → English)
271
- 2. Search MongoDB first (fast, strict)
272
- 3. If no results → Qdrant fallback (semantic, fuzzy)
273
- 4. Respond in user's language
274
 
275
- If a property exists, the user WILL find it.
 
 
 
276
  """
277
 
278
- logger.info(f"AIDA Search: {dto.query}")
279
 
280
  try:
281
- # Step 1: Extract parameters
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  search_params = await extract_search_params(dto.query)
283
 
284
- # Step 2: Try MongoDB first (fast)
285
- results = await search_mongodb(search_params, dto.limit)
286
- used_fallback = False
287
 
288
- # Step 3: If no results, try Qdrant (semantic fallback)
289
- if not results:
290
- logger.info("MongoDB found nothing, trying Qdrant fallback...")
291
- results = await search_qdrant_fallback(dto.query, search_params, dto.limit)
292
- used_fallback = True
293
 
294
- # Step 4: Format listings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  formatted_listings = []
296
  for doc in results:
297
  if "_id" in doc and not isinstance(doc["_id"], str):
@@ -300,6 +205,7 @@ async def aida_search(
300
  # Clean up internal fields
301
  doc.pop("_relevance_score", None)
302
  doc.pop("_is_suggestion", None)
 
303
  doc.pop("location_lower", None)
304
  doc.pop("listing_type_lower", None)
305
 
@@ -310,21 +216,28 @@ async def aida_search(
310
  logger.warning(f"Format warning: {e}")
311
  formatted_listings.append(doc)
312
 
313
- # Step 5: Generate response in user's language
314
- message = await generate_message(dto.query, len(formatted_listings), search_params, used_fallback)
 
 
 
 
 
 
315
 
316
- logger.info(f"Search complete: {len(formatted_listings)} results (fallback: {used_fallback})")
317
 
318
  return SearchResponseDto(
319
  success=True,
320
  message=message,
321
  data=formatted_listings,
322
  total=len(formatted_listings),
323
- search_params=search_params
 
324
  )
325
 
326
  except Exception as e:
327
- logger.error(f"Search error: {e}")
328
  raise HTTPException(
329
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
330
  detail=f"Search failed: {str(e)}"
 
1
  # ============================================================
2
+ # app/routes/search.py - AIDA Intelligent Search System
3
  # ============================================================
4
  """
5
+ SUPER INTELLIGENT SEARCH:
6
+ 1. Detects if input is a search query or casual conversation
7
+ 2. Intelligently chooses optimal search strategy (MongoDB, Qdrant, or combinations)
8
+ 3. Generates natural, conversational responses mentioning specific amenities
9
+ 4. Handles non-search inputs gracefully
 
10
 
11
+ Goal: Provide an exceptional search experience with smart strategy selection.
12
  """
13
 
14
  import logging
 
20
  from app.models.listing import Listing
21
  from app.guards.jwt_guard import get_current_user
22
 
23
+ # Import shared AI services
24
+ from app.ai.services.search_intent_classifier import classify_search_intent
25
+ from app.ai.services.search_strategy_selector import select_search_strategy, SearchStrategy
26
+ from app.ai.services.search_extractor import extract_search_params
27
+ from app.ai.services.search_responder import generate_natural_response, generate_non_search_response
28
+ from app.ai.services.search_service import (
29
+ search_listings_hybrid,
30
+ search_mongo_then_qdrant,
31
+ search_qdrant_then_mongo,
32
+ )
33
 
34
  router = APIRouter(tags=["AIDA Search"])
35
 
36
  logger = logging.getLogger(__name__)
37
 
 
 
 
 
 
 
 
 
38
 
39
  # ============================================================
40
  # SCHEMAS
 
51
  data: List[dict]
52
  total: int
53
  search_params: Optional[dict] = None
54
+ strategy_used: Optional[str] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
 
57
  # ============================================================
 
79
  if params.get("bedrooms"):
80
  query["bedrooms"] = {"$gte": params["bedrooms"]}
81
 
82
+ if params.get("bathrooms"):
83
+ query["bathrooms"] = {"$gte": params["bathrooms"]}
84
+
85
  if params.get("listing_type"):
86
  query["listing_type"] = {"$regex": params["listing_type"], "$options": "i"}
87
 
 
104
  return []
105
 
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  # ============================================================
108
  # MAIN SEARCH ENDPOINT
109
  # ============================================================
 
114
  current_user: dict = Depends(get_current_user),
115
  ):
116
  """
117
+ SUPER INTELLIGENT SEARCH
 
 
 
 
 
118
 
119
+ 1. Detects if input is a search query or casual conversation
120
+ 2. Intelligently chooses optimal strategy (MongoDB, Qdrant, combinations)
121
+ 3. Generates natural responses mentioning specific amenities
122
+ 4. Handles non-search inputs gracefully
123
  """
124
 
125
+ logger.info(f"AIDA Search Request: {dto.query}")
126
 
127
  try:
128
+ # Step 1: Classify intent - is this a search query?
129
+ intent_result = await classify_search_intent(dto.query)
130
+
131
+ if not intent_result.get("is_search", False):
132
+ # Not a search query - handle gracefully
133
+ logger.info(f"Non-search input detected: {intent_result.get('reason')}")
134
+
135
+ # Extract language for response
136
+ params = await extract_search_params(dto.query)
137
+ user_lang = params.get("user_language", "en")
138
+
139
+ message = await generate_non_search_response(dto.query, user_lang)
140
+
141
+ return SearchResponseDto(
142
+ success=True,
143
+ message=message,
144
+ data=[],
145
+ total=0,
146
+ search_params={"is_search": False, "confidence": intent_result.get("confidence")},
147
+ strategy_used="NON_SEARCH"
148
+ )
149
+
150
+ # Step 2: Extract search parameters
151
  search_params = await extract_search_params(dto.query)
152
 
153
+ # Step 3: Select optimal search strategy
154
+ strategy_result = await select_search_strategy(dto.query, search_params)
155
+ strategy = strategy_result.get("strategy", SearchStrategy.MONGO_ONLY)
156
 
157
+ logger.info(f"Strategy selected: {strategy} - {strategy_result.get('reasoning')}")
 
 
 
 
158
 
159
+ # Step 4: Execute search based on strategy
160
+ results = []
161
+
162
+ if strategy == SearchStrategy.MONGO_ONLY:
163
+ results = await search_mongodb(search_params, dto.limit)
164
+
165
+ # Fallback for structured searches if NO results
166
+ if not results:
167
+ logger.info("MONGO_ONLY yielded no results, falling back to QDRANT_ONLY as safety net")
168
+ from app.ai.services.search_service import search_listings_hybrid
169
+ results, _ = await search_listings_hybrid(
170
+ user_query=dto.query,
171
+ search_params=search_params,
172
+ limit=dto.limit,
173
+ mode="relaxed"
174
+ )
175
+ strategy = "QDRANT_FALLBACK"
176
+
177
+ elif strategy == SearchStrategy.QDRANT_ONLY:
178
+ results, _ = await search_listings_hybrid(
179
+ user_query=dto.query,
180
+ search_params=search_params,
181
+ limit=dto.limit,
182
+ mode="relaxed"
183
+ )
184
+
185
+ elif strategy == SearchStrategy.MONGO_THEN_QDRANT:
186
+ results, _ = await search_mongo_then_qdrant(
187
+ user_query=dto.query,
188
+ search_params=search_params,
189
+ limit=dto.limit
190
+ )
191
+
192
+ elif strategy == SearchStrategy.QDRANT_THEN_MONGO:
193
+ results, _ = await search_qdrant_then_mongo(
194
+ user_query=dto.query,
195
+ search_params=search_params,
196
+ limit=dto.limit
197
+ )
198
+
199
+ # Step 5: Format listings
200
  formatted_listings = []
201
  for doc in results:
202
  if "_id" in doc and not isinstance(doc["_id"], str):
 
205
  # Clean up internal fields
206
  doc.pop("_relevance_score", None)
207
  doc.pop("_is_suggestion", None)
208
+ doc.pop("_search_strategy", None)
209
  doc.pop("location_lower", None)
210
  doc.pop("listing_type_lower", None)
211
 
 
216
  logger.warning(f"Format warning: {e}")
217
  formatted_listings.append(doc)
218
 
219
+ # Step 6: Generate natural response
220
+ message = await generate_natural_response(
221
+ user_query=dto.query,
222
+ count=len(formatted_listings),
223
+ params=search_params,
224
+ listings=formatted_listings,
225
+ strategy_used=strategy
226
+ )
227
 
228
+ logger.info(f"Search complete: {len(formatted_listings)} results via {strategy}")
229
 
230
  return SearchResponseDto(
231
  success=True,
232
  message=message,
233
  data=formatted_listings,
234
  total=len(formatted_listings),
235
+ search_params=search_params,
236
+ strategy_used=strategy
237
  )
238
 
239
  except Exception as e:
240
+ logger.error(f"Search error: {e}", exc_info=True)
241
  raise HTTPException(
242
  status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
243
  detail=f"Search failed: {str(e)}"
test_intelligent_search.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # test_intelligent_search.py
2
+ """
3
+ Test suite for the intelligent AIDA search system.
4
+ """
5
+
6
+ import asyncio
7
+ import httpx
8
+ import json
9
+
10
+ # Base URL - adjust if needed
11
+ BASE_URL = "http://localhost:8000"
12
+
13
+ # You'll need a valid JWT token
14
+ JWT_TOKEN = "YOUR_JWT_TOKEN_HERE"
15
+
16
+ async def test_search(query: str, description: str):
17
+ """Test a single search query."""
18
+ print(f"\n{'='*70}")
19
+ print(f"TEST: {description}")
20
+ print(f"Query: {query}")
21
+ print(f"{'='*70}")
22
+
23
+ async with httpx.AsyncClient() as client:
24
+ try:
25
+ response = await client.post(
26
+ f"{BASE_URL}/api/search/",
27
+ json={"query": query, "limit": 5},
28
+ headers={"Authorization": f"Bearer {JWT_TOKEN}"},
29
+ timeout=30.0
30
+ )
31
+
32
+ if response.status_code == 200:
33
+ data = response.json()
34
+ print(f"✅ SUCCESS")
35
+ print(f"Message: {data.get('message')}")
36
+ print(f"Strategy Used: {data.get('strategy_used')}")
37
+ print(f"Results: {data.get('total')} properties")
38
+
39
+ if data.get('search_params'):
40
+ print(f"Extracted Params: {json.dumps(data['search_params'], indent=2)}")
41
+
42
+ # Show first property if available
43
+ if data.get('data') and len(data['data']) > 0:
44
+ prop = data['data'][0]
45
+ print(f"\nFirst Property:")
46
+ print(f" - Title: {prop.get('title')}")
47
+ print(f" - Location: {prop.get('location')}")
48
+ print(f" - Price: {prop.get('price')} {prop.get('currency')}")
49
+ print(f" - Bedrooms: {prop.get('bedrooms')}")
50
+ if prop.get('amenities'):
51
+ print(f" - Amenities: {', '.join(prop['amenities'][:5])}")
52
+ else:
53
+ print(f"❌ FAILED: Status {response.status_code}")
54
+ print(f"Response: {response.text}")
55
+
56
+ except Exception as e:
57
+ print(f"❌ ERROR: {str(e)}")
58
+
59
+
60
+ async def main():
61
+ """Run all search tests."""
62
+
63
+ print("\n" + "="*70)
64
+ print("INTELLIGENT SEARCH SYSTEM - TEST SUITE")
65
+ print("="*70)
66
+
67
+ # Test 1: Non-search input (greeting)
68
+ await test_search(
69
+ "hello",
70
+ "Non-search input: Greeting"
71
+ )
72
+
73
+ # Test 2: Non-search input (thanks)
74
+ await test_search(
75
+ "thank you so much",
76
+ "Non-search input: Thank you"
77
+ )
78
+
79
+ # Test 3: Structured query - should use MONGO_ONLY
80
+ await test_search(
81
+ "3 bedroom apartment in Cotonou under 200k",
82
+ "Structured query (expect: MONGO_ONLY strategy)"
83
+ )
84
+
85
+ # Test 4: Semantic query - should use QDRANT_ONLY
86
+ await test_search(
87
+ "cozy modern apartment with good vibes",
88
+ "Semantic query (expect: QDRANT_ONLY strategy)"
89
+ )
90
+
91
+ # Test 5: Hybrid query - should use MONGO_THEN_QDRANT
92
+ await test_search(
93
+ "house close to international school in Calavi",
94
+ "Hybrid query with location + context (expect: MONGO_THEN_QDRANT)"
95
+ )
96
+
97
+ # Test 6: Amenity-focused query
98
+ await test_search(
99
+ "apartment with balcony and swimming pool in Cotonou",
100
+ "Amenity-focused query"
101
+ )
102
+
103
+ # Test 7: French query
104
+ await test_search(
105
+ "maison avec piscine à Calavi",
106
+ "French query (expect: French response)"
107
+ )
108
+
109
+ # Test 8: Query with typos
110
+ await test_search(
111
+ "apartmnt in cotonoo with parkng",
112
+ "Query with typos (expect: typo correction)"
113
+ )
114
+
115
+ # Test 9: Price range query
116
+ await test_search(
117
+ "houses under 150k in Lagos",
118
+ "Price range query"
119
+ )
120
+
121
+ # Test 10: Vague semantic query
122
+ await test_search(
123
+ "family-friendly neighborhood quiet area",
124
+ "Vague semantic query (expect: QDRANT_ONLY)"
125
+ )
126
+
127
+ print("\n" + "="*70)
128
+ print("TEST SUITE COMPLETE")
129
+ print("="*70)
130
+ print("\nNOTE: Update JWT_TOKEN at the top of this file with a valid token")
131
+ print("Run this script with: python test_intelligent_search.py")
132
+
133
+
134
+ if __name__ == "__main__":
135
+ asyncio.run(main())