File size: 16,049 Bytes
9b1bb35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56bf5b7
 
 
 
 
 
9b1bb35
56bf5b7
 
 
9b1bb35
1d37dfb
e65d77f
56bf5b7
 
 
 
 
206abfa
56bf5b7
 
 
 
 
 
 
 
 
 
 
 
206abfa
 
 
 
 
56bf5b7
 
206abfa
56bf5b7
 
 
 
206abfa
 
 
 
 
 
 
 
56bf5b7
 
 
 
 
 
8699593
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56bf5b7
 
 
206abfa
67df737
56bf5b7
 
 
 
 
 
 
 
 
 
 
 
 
 
8699593
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56bf5b7
 
 
46e45ce
67df737
56bf5b7
 
 
 
 
 
 
 
 
 
 
9b1bb35
9d3ad1f
 
8699593
90d8ff6
8699593
 
 
 
90d8ff6
 
 
 
 
 
 
8699593
90d8ff6
 
 
 
 
8699593
90d8ff6
 
 
 
8699593
9d3ad1f
 
 
 
 
 
 
 
 
 
 
 
 
9b1bb35
9d3ad1f
 
 
 
9b1bb35
 
 
9d3ad1f
 
 
 
 
 
 
 
 
9b1bb35
56bf5b7
 
 
 
 
 
 
 
 
 
 
 
9b1bb35
6b7a40f
56bf5b7
 
 
 
130d5d6
 
 
 
 
 
 
 
 
 
56bf5b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
# import logging
# from langchain_core.tools import tool
# from langchain_core.documents import Document
# from langchain_huggingface import HuggingFaceEmbeddings
# from serialization import convert_to_serializable
# from typing import List
# # Set up logging
# logging.basicConfig(level=logging.INFO)
# logger = logging.getLogger(__name__)
# import os
# os.environ["HF_HOME"] = "/app/.cache"

# # Initialize embeddings
# embedmodel = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# # Raw vector search function
# def raw_vector_search(collection, query: str, index_name: str, k: int = 10) -> List[Document]:
#     try:
#         query_embedding = embedmodel.embed_query(query)
#         pipeline = [
#             {
#                 "$vectorSearch": {
#                     "index": index_name,
#                     "path": "revoemb",
#                     "queryVector": query_embedding,
#                     "numCandidates": k * 10,
#                     "limit": k
#                 }
#             },
#             {
#                 "$project": {
#                     "revoemb": 0,
#                     "score": {"$meta": "vectorSearchScore"}
#                 }
#             }
#         ]
#         results = list(collection.aggregate(pipeline))
#         return [Document(
#             page_content=r.get("description", ""),
#             metadata={k: v for k, v in r.items() if k != "description"},
#             score=r.get("score", 0)
#         ) for r in results]
#     except Exception as e:
#         logger.error("Vector search error: %s", str(e))
#         return []

# # Define tools
# @tool
# def properties_vector_search(query: str, properties_collection=None) -> List[dict]:
#     """Search for real estate properties based on a query."""
#     try:
#         if properties_collection is None:
#             raise ValueError("Properties collection not provided")
#         results = raw_vector_search(properties_collection, query, "properties_vector_index")
#         logger.info("Properties query: %s, results: %d", query, len(results))
#         return [
#             {
#                 "content": r.page_content,
#                 "metadata": convert_to_serializable(r.metadata),
#                 "score": r.metadata.get("score", 0)
#             }
#             for r in results
#         ]
#     except Exception as e:
#         logger.error("Properties search error: %s", str(e))
#         return []

# @tool
# def companies_vector_search(query: str, companies_collection=None) -> List[dict]:
#     """Search for real estate companies based on a query."""
#     try:
#         if companies_collection is None:
#             raise ValueError("Companies collection not provided")
#         results = raw_vector_search(companies_collection, query, "companies_vector_index")
#         logger.info("Companies query: %s, results: %d", query, len(results))
#         return [
#             {
#                 "content": r.page_content,
#                 "metadata": convert_to_serializable(r.metadata),
#                 "score": r.metadata.get("score", 0)
#             }
#             for r in results
#         ]
#     except Exception as e:
#         logger.error("Companies search error: %s", str(e))
#         return []
# @tool
# def revoestate_information(query: str, revoestate_collection=None) -> List[dict]:
#     """Search for Revoestate information based on a query and to get information about systsem."""

#     try:
#         if revoestate_collection is None:
#             raise ValueError("Revoestate collection not provided")
#         query_embedding = embedmodel.embed_query(query)
#         index_name = "revoinformation_vector_index"

#         pipeline = [
#             {
#                 "$vectorSearch": {
#                     "index": index_name,
#                     "queryVector": query_embedding,
#                     "path": "revoemb",
#                     "limit": 5,
#                     "numCandidates": 100  # Added required parameter for approximate search
#                 }
#             },
#             {
#                 "$project": {
#                     "_id": 0,
#                     "text": 1,
#                     "revoemb": 0,
#                     "score": {"$meta": "vectorSearchScore"}  # Include relevance score
#                 }
#             }
#         ]

#         results = revoestate_collection.aggregate(pipeline)
#         return list(results)
#     except Exception as e:
#         logger.error("Revoestate search error: %s", str(e))
#         return []
# async def get_properties_by_context(query: str, properties_collection=None) -> List[dict]:
#     """Get properties by context."""
#     try:
#         if properties_collection is None:
#             raise ValueError("Properties collection not provided")
#         query_embedding = embedmodel.embed_query(query)
#         pipeline = [
#             {
#                 "$vectorSearch": {
#                     "index": "properties_vector_index",
#                     "path": "revoemb",
#                     "queryVector": query_embedding,
#                     "numCandidates":  100,
#                     "limit": 10
#                 }
#             },
#             {
#                 "$project": {
#                     "revoemb": 0,
#                     "score": {"$meta": "vectorSearchScore"}
#                 }
#             }
#         ]
#         results = list(properties_collection.aggregate(pipeline))
        
#         # Convert ObjectId fields to strings
#         for result in results:
#             if '_id' in result:
#                 result['_id'] = str(result['_id'])
#             if 'companyId' in result:
#                 result['companyId'] = str(result['companyId'])
#             if 'userId' in result:
#                 result['userId'] = str(result['userId'])
#             # Add other ObjectId fields as needed (e.g., purchaseId)
#             if 'purchaseId' in result:
#                 result['purchaseId'] = str(result['purchaseId'])
        
#         logger.info("Properties by context query: %s, results: %d", query, len(results))
#         return results
#     except Exception as e:
#         logger.error("Properties by context error: %s", str(e))
#         return []
import logging
from langchain_core.tools import tool
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
from serialization import convert_to_serializable
from typing import List

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

import os
os.environ["HF_HOME"] = "/app/.cache"

# Initialize embeddings
embedmodel = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Raw vector search function
def raw_vector_search(collection, query: str, index_name: str, exclude_fields: List[str] = [], k: int = 10) -> List[Document]:
    try:
        query_embedding = embedmodel.embed_query(query)
        pipeline = [
            {
                "$vectorSearch": {
                    "index": index_name,
                    "path": "revoemb",
                    "queryVector": query_embedding,
                    "numCandidates": k * 10,
                    "limit": k
                }
            },
            {
                "$addFields": {
                    "score": {"$meta": "vectorSearchScore"}
                }
            },
            {
                "$project": {
                    **{field: 0 for field in exclude_fields},
                }
            }
        ]
        results = list(collection.aggregate(pipeline))
        return [
            Document(
                page_content=r.get("description", ""),
                metadata={k: v for k, v in r.items() if k != "description"},
                score=r.get("score", 0)
            )
            for r in results
        ]
    except Exception as e:
        logger.error("Vector search error: %s", str(e))
        return []
# Define tools
@tool
def properties_vector_search(query: str, properties_collection=None) -> List[dict]:
    """Search for real estate properties in Addis Ababa, Ethiopia, based on a user query.
    
    This tool searches a collection of properties including homes, apartments, villas, condos, and more.
    It returns detailed information such as title, price, location (with subcity/district and coordinates if available),
    specifications (bedrooms, bathrooms, area, built year), amenities, and descriptions.
    
    Args:
        query (str): The user's search query (e.g., "apartments in Bole" or "villas with 3 bedrooms").
        properties_collection: The database collection containing property data (defaults to None).
    
    Returns:
        List[dict]: A list of dictionaries, each containing:
            - content (str): The property description.
            - metadata (dict): Property details (e.g., price, location, bedrooms).
            - score (float): Relevance score of the match.
    
    Raises:
        ValueError: If properties_collection is not provided.
        Exception: If the search fails due to database or processing errors.
    """ 
    try:
        if properties_collection is None:
            raise ValueError("Properties collection not provided")
        results = raw_vector_search(properties_collection, query, "properties_vector_index",exclude_fields=["images", "panoramicImages","revoemb"])
        logger.info("Properties query: %s, results: %d", query, len(results),results)
        return [
            {
                "content": r.page_content,
                "metadata": convert_to_serializable(r.metadata),
                "score": r.metadata.get("score", 0)
            }
            for r in results
        ]
    except Exception as e:
        logger.error("Properties search error: %s", str(e))
        return []

@tool
def companies_vector_search(query: str, companies_collection=None) -> List[dict]:
    """Search for real estate companies in Addis Ababa, Ethiopia, based on a user query.
    
    This tool retrieves information about real estate agencies or companies, including their name,
    services offered, contact details (phone, email, website), physical address, years in operation,
    and specializations. Use this when the user explicitly asks about a company (e.g., "Tell me about ABC Realty").
    
    Args:
        query (str): The user's search query (e.g., "real estate companies in Addis" or "ABC Realty details").
        companies_collection: The database collection containing company data (defaults to None).
    
    Returns:
        List[dict]: A list of dictionaries, each containing:
            - content (str): The company description.
            - metadata (dict): Company details (e.g., contact info, services).
            - score (float): Relevance score of the match.
    
    Raises:
        ValueError: If companies_collection is not provided.
        Exception: If the search fails due to database or processing errors.
    """
    try:
        if companies_collection is None:
            raise ValueError("Companies collection not provided")
        results = raw_vector_search(companies_collection, query, "companies_vector_index",exclude_fields=["revoemb","documentUrl","imageUrl"])
        logger.info("Companies query: %s, results: %d", query, len(results),results)
        return [
            {
                "content": r.page_content,
                "metadata": convert_to_serializable(r.metadata),
                "score": r.metadata.get("score", 0)
            }
            for r in results
        ]
    except Exception as e:
        logger.error("Companies search error: %s", str(e))
        return []

@tool
def revoestate_information(query: str, revoestate_collection=None) -> List[dict]:
    """Search for information about the Revoestate platform based on a user query.

    This tool provides details about Revoestate, including its mission, services (e.g., property listings,
    company profiles), role in Ethiopian real estate, how to use the platform (e.g., listing properties,
    searching for homes), and contact information. Use this for queries like "What is Revoestate?",
    "How do I use this website?", or "What services does Revoestate offer?".

    **Args:**
        query (str): The user's query about Revoestate (e.g., "What is Revoestate?" or "How to list a property or who are founders of this platform").
        revoestate_collection: The database collection containing Revoestate data. This parameter is required
            for the search to execute.

    **Returns:**
        List[dict]: A list of dictionaries, each containing:
            - 'text' (str): Information about Revoestate.
            - 'score' (float): Relevance score of the match.

    **Raises:**
        ValueError: If `revoestate_collection` is not provided.
        Exception: If the search fails due to database or processing errors.

    **Note:**
        This tool is designed to be integrated into chatbots or APIs to provide real-time information
        about Revoestate based on user queries.
    """
    try:
        if revoestate_collection is None:
            raise ValueError("Revoestate collection not provided")
        query_embedding = embedmodel.embed_query(query)
        index_name = "revoinformation_vector_index"

        pipeline = [
            {
                "$vectorSearch": {
                    "index": index_name,
                    "queryVector": query_embedding,
                    "path": "revoemb",
                    "limit": 5,
                    "numCandidates": 100
                }
            },
            {
                "$project": {
                    "_id": 0,  # Explicitly exclude _id
                    "text": 1,  # Include text
                    "score": {"$meta": "vectorSearchScore"}  # Include score
                }
            }
        ]

        results = revoestate_collection.aggregate(pipeline)
        return list(results)
    except Exception as e:
        logger.error("Revoestate search error: %s", str(e))
        return []

async def get_properties_by_context(query: str, properties_collection=None) -> List[dict]:
    """Get properties by context."""
    try:
        if properties_collection is None:
            raise ValueError("Properties collection not provided")
        query_embedding = embedmodel.embed_query(query)
        pipeline = [
            {
                "$vectorSearch": {
                    "index": "properties_vector_index",
                    "path": "revoemb",
                    "queryVector": query_embedding,
                    "numCandidates": 100,
                    "limit": 6
                }
            },
            {
                "$project": {
                    # "_id": 0,  # Explicitly exclude _id
                    # "description": 1,  # Include description
                    # "score": {"$meta": "vectorSearchScore"},  # Include score
                    # # Include other fields as needed
                    # "companyId": 1,
                    # "userId": 1,
                    # "purchaseId": 1
                    "revoemb": 0,  
                    "score": {"$meta": "vectorSearchScore"}

                }
            }
        ]
        results = list(properties_collection.aggregate(pipeline))
        
        # Convert ObjectId fields to strings
        for result in results:
            if '_id' in result:
                result['_id'] = str(result['_id'])
            if 'companyId' in result:
                result['companyId'] = str(result['companyId'])
            if 'userId' in result:
                result['userId'] = str(result['userId'])
            if 'purchaseId' in result:
                result['purchaseId'] = str(result['purchaseId'])
        
        logger.info("Properties by context query: %s, results: %d", query, len(results))
        return results
    except Exception as e:
        logger.error("Properties by context error: %s", str(e))
        return []