GitHub Actions commited on
Commit
56bf5b7
·
1 Parent(s): 55ab1b4

Deploy chatbot from GitHub Actions

Browse files
Dockerfile ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+ WORKDIR /app
3
+ COPY requirements.txt .
4
+ RUN pip install --no-cache-dir -r requirements.txt
5
+ RUN pip install --no-cache-dir gunicorn
6
+ COPY GeminiAgent.py main.py routes.py serialization.py tool.py .
7
+ EXPOSE 7860
8
+ CMD ["gunicorn", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "main:app", "--bind", "0.0.0.0:7860"]
GeminiAgent.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import logging
3
+ import os
4
+ import json
5
+ import operator
6
+ from typing import TypedDict, List, Annotated
7
+ from langchain_core.tools import tool
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+ from langgraph.graph import StateGraph, END
10
+ from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage
11
+ from dotenv import load_dotenv
12
+ from pymongo import MongoClient
13
+ from tool import properties_vector_search, companies_vector_search
14
+ from langgraph.checkpoint.memory import MemorySaver
15
+ checkpointer = MemorySaver()
16
+ # Set up logging
17
+ logging.basicConfig(level=logging.INFO)
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Load environment variables
21
+ load_dotenv()
22
+ CONNECTION_STRING = os.getenv("MongoURI")
23
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
24
+ if not GEMINI_API_KEY or not CONNECTION_STRING:
25
+ logger.error("Missing required environment variables: MongoURI or GEMINI_API_KEY")
26
+ raise ValueError("Missing required environment variables.")
27
+
28
+ # Initialize MongoDB client with timeout settings
29
+ mongo_client = MongoClient(
30
+ CONNECTION_STRING,
31
+ serverSelectionTimeoutMS=30000,
32
+ connectTimeoutMS=30000,
33
+ socketTimeoutMS=30000
34
+ )
35
+
36
+ properties_collection = mongo_client["revostate"]["properties"]
37
+ companies_collection = mongo_client["revostate"]["companies"]
38
+
39
+
40
+ # Verify collections
41
+ try:
42
+ logger.info("Properties count: %d", properties_collection.count_documents({}))
43
+ logger.info("Companies count: %d", companies_collection.count_documents({}))
44
+ except Exception as e:
45
+ logger.error("MongoDB connection error: %s", str(e))
46
+ raise
47
+
48
+ # Define agent state
49
+ class AgentState(TypedDict):
50
+ messages: Annotated[list[AnyMessage], operator.add]
51
+
52
+ # Define system prompt
53
+ system_prompt = """
54
+ You are a knowledgeable and friendly real estate assistant specializing in properties and companies in Addis Ababa. Your goal is to provide comprehensive, tailored responses that match the user's request exactly, including relevant company or real estate agency details only when explicitly requested.
55
+
56
+ Key Guidelines:
57
+ 1. Response Style:
58
+ - Use natural, conversational language while maintaining professionalism.
59
+ - Adapt response format based on the user's request:
60
+ * For "details" or specific queries (e.g., coordinates, company info), include all available metadata.
61
+ * For "summary" or brief info requests, provide a concise overview.
62
+ * Default to detailed responses unless specified otherwise.
63
+ - Always conclude by asking if the user needs more information or has other questions.
64
+
65
+ 2. Property Information:
66
+ - Prioritize key details: title, price, location.
67
+ - For detailed responses, include:
68
+ * Full address with subcity/district.
69
+ * Exact coordinates (latitude/longitude) when available.
70
+ * Specifications: bedrooms, bathrooms, area, built year, etc.
71
+ * Amenities, furnished status, and special features.
72
+ * Clear description of the property.
73
+ - Present information in bullet points or short paragraphs for clarity.
74
+ - If properties are from nearby areas, clearly state this (e.g., "This property is in Lemi Kura, near Bole").
75
+ - If exact address or coordinates are unavailable, note this explicitly.
76
+
77
+ 3. Company/Real Estate Agency Information:
78
+ - Provide company details only when the user explicitly requests information about the real estate agency or property owner (e.g., "Can I also get information about the real estate owner of the property?").
79
+ - When company details are requested:
80
+ * Use the `companies_vector_search` tool to retrieve information based on the `companyId` referenced in the property data.
81
+ * Include:
82
+ - Company name, services offered, and contact details (phone, email, website).
83
+ - Full address and years in operation (if available).
84
+ - Specializations or notable projects.
85
+ * If `companies_vector_search` returns no results, state: "Company details are not available for this listing. Please contact the listing platform for more information."
86
+ - Do not fetch or include company details unless explicitly requested in the query.
87
+
88
+ 4. Query Handling:
89
+ - For location-based queries (e.g., "Yeka subcity"), only include properties/companies in that area unless none are found, then mention nearby areas.
90
+ - When coordinates are requested, present them prominently.
91
+ - If the user asks about properties without mentioning the company or real estate agency (e.g., "Properties in Bole"), provide only property details based on the query.
92
+ - If the user asks for company or real estate details (e.g., "Tell me the address of the real estate that created these properties"), retrieve and include company details using the `companyId` from the property data.
93
+ - Ensure responses are accurate and avoid fabricating unavailable data.
94
+
95
+ 5. Example Responses:
96
+ **Property-Only Query:**
97
+ User Query: "Can I get a 3-Bedroom Apartment for Sale in Bole?"
98
+ Response:
99
+ "I found a 3-bedroom apartment for sale in Bole Subcity:
100
+ - **Title**: 3bdrm Apartment in Bole for sale
101
+ - **Price**: 17,000,000 ETB
102
+ - **Location**: Near Bole International Airport, Bole Subcity
103
+ - **Specifications**:
104
+ * Bedrooms: 3
105
+ * Bathrooms: 2
106
+ * Area: 167 sqm
107
+ * Built: 2018
108
+ - **Features**: Furnished, flexible payment plan (15% down payment)
109
+ - **Description**: Enjoy a spacious, modern apartment with premium amenities near the airport.
110
+ Do you need more details or other listings?"
111
+
112
+ **Property and Company Query:**
113
+ User Query: "Can I get a 3-Bedroom Apartment for Sale in Bole? Can I also get information about the real estate owner of the property?"
114
+ Response:
115
+ "I found a 3-bedroom apartment for sale in Bole Subcity, along with details of the real estate company that listed it:
116
+
117
+ **Property Details:**
118
+ - **Title**: 3bdrm Apartment in Bole for sale
119
+ - **Price**: 17,000,000 ETB
120
+ - **Location**: Near Bole International Airport, Bole Subcity
121
+ - **Specifications**:
122
+ * Bedrooms: 3
123
+ * Bathrooms: 2
124
+ * Area: 167 sqm
125
+ * Built: 2018
126
+ - **Features**: Furnished, flexible payment plan (15% down payment)
127
+ - **Description**: Enjoy a spacious, modern apartment with premium amenities near the airport.
128
+
129
+ **Real Estate Company Details:**
130
+ - **Name**: Ayat Real Estate
131
+ - **Services**: Specializes in premium residential and commercial properties
132
+ - **Address**: [Insert full address from companies_vector_search]
133
+ - **Contact**:
134
+ * Phone: +251 969 60 60 60
135
+ * Email: jibrilarbicho185@gmail.com
136
+ - **Description**: Ayat Real Estate is known for high-quality developments in Addis Ababa.
137
+
138
+ Do you need more details about this property, other listings, or additional company information?"
139
+
140
+ 6. Tool Usage:
141
+ - Call `companies_vector_search` only when the user explicitly requests company or real estate agency details, using the `companyId` from each property's data.
142
+ - For queries involving multiple properties with company details requested, call the tool for each unique `companyId`.
143
+ - Do not call `companies_vector_search` for queries that only ask for property details (e.g., "Properties in Bole").
144
+ - If `companies_vector_search` is called and returns no results, state: "Company details are not available for this listing. Please contact the listing platform for more information."
145
+
146
+ 7. Data Integrity:
147
+ - Use property data fields (e.g., `companyId`, `address`, `price`) accurately.
148
+ - For missing data (e.g., address, coordinates), indicate: "Specific [field] is unavailable for this listing."
149
+ - Ensure company details, when requested, align with the property's `companyId`.
150
+
151
+ This prompt ensures accurate, query-specific responses, fetching company details via `companies_vector_search` only when explicitly requested, while providing property details for all relevant queries.
152
+ """
153
+
154
+ # Define Agent class
155
+ class Agent:
156
+ def __init__(self, model, tools,checkpointer, system=""):
157
+ self.system = system
158
+ graph = StateGraph(AgentState)
159
+ graph.add_node("llm", self.call_gemini)
160
+ graph.add_node("action", self.take_action)
161
+ graph.add_conditional_edges(
162
+ "llm",
163
+ self.exists_action,
164
+ {True: "action", False: END}
165
+ )
166
+
167
+ graph.set_entry_point("llm")
168
+ graph.add_edge("action", "llm")
169
+
170
+ self.tools = {t.name: t for t in tools}
171
+ self.model = model.bind_tools(tools)
172
+ self.graph = graph.compile(checkpointer=checkpointer)
173
+
174
+ def exists_action(self, state: AgentState):
175
+ result = state['messages'][-1]
176
+ return len(result.tool_calls) > 0
177
+
178
+ def call_gemini(self, state: AgentState):
179
+ messages = state['messages']
180
+ if self.system:
181
+ messages = [SystemMessage(content=self.system)] + messages
182
+ message = self.model.invoke(messages)
183
+ return {'messages': [message]}
184
+ # take action
185
+ def take_action(self, state: AgentState):
186
+ tool_calls = state['messages'][-1].tool_calls
187
+ results = []
188
+ for t in tool_calls:
189
+ print(f"Calling: {t}")
190
+ if t['name'] not in self.tools:
191
+ print("\n ....bad tool name....")
192
+ result = "bad tool name, retry"
193
+ else:
194
+ # Pass collections to tool functions
195
+ if t['name'] == 'properties_vector_search':
196
+ result = self.tools[t['name']].invoke({'query': t['args']['query'], 'properties_collection': properties_collection})
197
+ elif t['name'] == 'companies_vector_search':
198
+ result = self.tools[t['name']].invoke({'query': t['args']['query'], 'companies_collection': companies_collection})
199
+ else:
200
+ result = self.tools[t['name']].invoke(t['args'])
201
+ # Preserve result as a dictionary for detailed formatting
202
+ results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=json.dumps(result)))
203
+ print("Back to the model!")
204
+ return {'messages': results}
205
+
206
+ # Initialize LLM and Agent
207
+ llm = ChatGoogleGenerativeAI(
208
+ model="gemini-1.5-flash",
209
+ google_api_key=GEMINI_API_KEY,
210
+ temperature=0.7
211
+ )
212
+ tools = [properties_vector_search, companies_vector_search]
213
+ agent = Agent(model=llm, tools=tools, system=system_prompt, checkpointer=checkpointer)
214
+
215
+ # Run agent
216
+ async def run_agent(query: str) -> str:
217
+ state = {
218
+ "messages": [HumanMessage(content=query)]
219
+ }
220
+ try:
221
+ result = await agent.graph.ainvoke(state)
222
+ last_message = result["messages"][-1].content
223
+ return last_message
224
+ except Exception as e:
225
+ logger.error("Agent execution error: %s", str(e))
226
+ if "tool_results" in locals() and locals().get("tool_results"):
227
+ return json.dumps(locals()["tool_results"], indent=2)
228
+ return f"Sorry, an error occurred: {str(e)}"
__pycache__/GeminiAgent.cpython-311.pyc ADDED
Binary file (14.2 kB). View file
 
__pycache__/main.cpython-311.pyc ADDED
Binary file (655 Bytes). View file
 
__pycache__/routes.cpython-311.pyc ADDED
Binary file (4.79 kB). View file
 
__pycache__/serialization.cpython-311.pyc ADDED
Binary file (1.37 kB). View file
 
__pycache__/tool.cpython-311.pyc ADDED
Binary file (6.72 kB). View file
 
main.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from routes import router
4
+
5
+ app = FastAPI()
6
+
7
+ app.add_middleware(
8
+ CORSMiddleware,
9
+ allow_origins="*",
10
+ allow_credentials=True,
11
+ allow_methods=["*"],
12
+ allow_headers=["*"],
13
+ )
14
+
15
+ app.include_router(router) # Mount router without prefix
requirements.txt ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ annotated-types==0.7.0
2
+ anyio==4.9.0
3
+ asttokens==3.0.0
4
+ cachetools==5.5.2
5
+ certifi==2025.4.26
6
+ charset-normalizer==3.4.1
7
+ click==8.1.8
8
+ comm==0.2.2
9
+ debugpy==1.8.14
10
+ decorator==5.2.1
11
+ distro==1.9.0
12
+ dnspython==2.7.0
13
+ einops==0.8.1
14
+ email_validator==2.2.0
15
+ executing==2.2.0
16
+ fastapi==0.115.12
17
+ fastapi-cli==0.0.7
18
+ filelock==3.18.0
19
+ filetype==1.2.0
20
+ fsspec==2025.3.2
21
+ google-ai-generativelanguage==0.6.18
22
+ google-api-core==2.24.2
23
+ google-auth==2.39.0
24
+ googleapis-common-protos==1.70.0
25
+ greenlet==3.2.1
26
+ groq==0.24.0
27
+ grpcio==1.71.0
28
+ grpcio-status==1.71.0
29
+ h11==0.16.0
30
+ hf-xet==1.1.0
31
+ httpcore==1.0.9
32
+ httptools==0.6.4
33
+ httpx==0.28.1
34
+ huggingface-hub==0.30.2
35
+ idna==3.10
36
+ ipykernel==6.29.5
37
+ ipython==9.2.0
38
+ ipython_pygments_lexers==1.1.1
39
+ jedi==0.19.2
40
+ Jinja2==3.1.6
41
+ joblib==1.4.2
42
+ jsonpatch==1.33
43
+ jsonpointer==3.0.0
44
+ jupyter_client==8.6.3
45
+ jupyter_core==5.7.2
46
+ langchain==0.3.24
47
+ langchain-core==0.3.57
48
+ langchain-google-genai==2.1.4
49
+ langchain-groq==0.3.2
50
+ langchain-huggingface==0.1.2
51
+ langchain-mongodb==0.6.1
52
+ langchain-text-splitters==0.3.8
53
+ langgraph==0.4.1
54
+ langgraph-checkpoint==2.0.25
55
+ langgraph-checkpoint-mongodb==0.1.3
56
+ langgraph-prebuilt==0.1.8
57
+ langgraph-sdk==0.1.66
58
+ langsmith==0.3.40
59
+ lark==1.2.2
60
+ markdown-it-py==3.0.0
61
+ MarkupSafe==3.0.2
62
+ matplotlib-inline==0.1.7
63
+ mdurl==0.1.2
64
+ motor==3.7.0
65
+ mpmath==1.3.0
66
+ nest-asyncio==1.6.0
67
+ networkx==3.4.2
68
+ numpy==2.2.5
69
+ nvidia-cublas-cu12==12.6.4.1
70
+ nvidia-cuda-cupti-cu12==12.6.80
71
+ nvidia-cuda-nvrtc-cu12==12.6.77
72
+ nvidia-cuda-runtime-cu12==12.6.77
73
+ nvidia-cudnn-cu12==9.5.1.17
74
+ nvidia-cufft-cu12==11.3.0.4
75
+ nvidia-cufile-cu12==1.11.1.6
76
+ nvidia-curand-cu12==10.3.7.77
77
+ nvidia-cusolver-cu12==11.7.1.2
78
+ nvidia-cusparse-cu12==12.5.4.2
79
+ nvidia-cusparselt-cu12==0.6.3
80
+ nvidia-nccl-cu12==2.26.2
81
+ nvidia-nvjitlink-cu12==12.6.85
82
+ nvidia-nvtx-cu12==12.6.77
83
+ orjson==3.10.18
84
+ ormsgpack==1.9.1
85
+ packaging==24.2
86
+ pandas==2.2.3
87
+ parso==0.8.4
88
+ pexpect==4.9.0
89
+ pillow==11.2.1
90
+ platformdirs==4.3.7
91
+ prompt_toolkit==3.0.51
92
+ proto-plus==1.26.1
93
+ protobuf==5.29.4
94
+ psutil==7.0.0
95
+ ptyprocess==0.7.0
96
+ pure_eval==0.2.3
97
+ pyasn1==0.6.1
98
+ pyasn1_modules==0.4.2
99
+ pydantic==2.11.4
100
+ pydantic_core==2.33.2
101
+ Pygments==2.19.1
102
+ pygraphviz==1.14
103
+ pymongo==4.11.3
104
+ python-dateutil==2.9.0.post0
105
+ python-dotenv==1.1.0
106
+ python-multipart==0.0.20
107
+ pytz==2025.2
108
+ PyYAML==6.0.2
109
+ pyzmq==26.4.0
110
+ regex==2024.11.6
111
+ requests==2.32.3
112
+ requests-toolbelt==1.0.0
113
+ rich==14.0.0
114
+ rich-toolkit==0.14.4
115
+ rsa==4.9.1
116
+ safetensors==0.5.3
117
+ scikit-learn==1.6.1
118
+ scipy==1.15.2
119
+ sentence-transformers==4.1.0
120
+ shellingham==1.5.4
121
+ six==1.17.0
122
+ sniffio==1.3.1
123
+ SQLAlchemy==2.0.40
124
+ stack-data==0.6.3
125
+ starlette==0.46.2
126
+ sympy==1.14.0
127
+ tenacity==9.1.2
128
+ threadpoolctl==3.6.0
129
+ tokenizers==0.21.1
130
+ torch==2.7.0
131
+ tornado==6.4.2
132
+ tqdm==4.67.1
133
+ traitlets==5.14.3
134
+ transformers==4.51.3
135
+ triton==3.3.0
136
+ typer==0.15.3
137
+ typing-inspection==0.4.0
138
+ typing_extensions==4.13.2
139
+ tzdata==2025.2
140
+ urllib3==2.4.0
141
+ uvicorn==0.34.2
142
+ uvloop==0.21.0
143
+ watchfiles==1.0.5
144
+ wcwidth==0.2.13
145
+ websockets==15.0.1
146
+ xxhash==3.5.0
147
+ zstandard==0.23.0
routes.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Body, Request, Response, HTTPException, status
2
+ import asyncio
3
+ import logging
4
+ import json
5
+ from GeminiAgent import agent,properties_collection
6
+ from langchain_core.messages import HumanMessage
7
+ from pydantic import BaseModel
8
+ from typing import Any
9
+ from tool import get_properties_by_context
10
+
11
+ # Set up logging
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ router = APIRouter()
16
+ class QueryRequest(BaseModel):
17
+ query: Any
18
+ thread_id: str
19
+
20
+ @router.post("/chatbot", response_description="Chatbot response", status_code=status.HTTP_200_OK)
21
+ async def chatbot_response(request: Request, response: Response, body: QueryRequest):
22
+ """
23
+ Handles the chatbot response.
24
+ """
25
+ try:
26
+ # Extract the query from the request body
27
+ query = body.query
28
+ thread_id=body.thread_id
29
+ if not query:
30
+ raise HTTPException(status_code=400, detail="Query is required")
31
+
32
+ # Run the agent with the provided query
33
+ result = await run_agent(query, thread_id=thread_id)
34
+
35
+ # Return the result
36
+ return {"response": result}
37
+ except Exception as e:
38
+ logger.error("Error in chatbot_response: %s", str(e))
39
+ raise HTTPException(status_code=500, detail="Internal Server Error")
40
+
41
+ async def run_agent(query: str,thread_id:str) -> str:
42
+ state = {
43
+ "messages": [HumanMessage(content=query)]
44
+ }
45
+ try:
46
+ config = {"configurable": {"thread_id": thread_id}}
47
+
48
+ result = await agent.graph.ainvoke(state, config)
49
+ last_message = result["messages"][-1].content
50
+ return last_message
51
+ except Exception as e:
52
+ logger.error("Agent execution error: %s", str(e))
53
+ if "tool_results" in result and result.get("tool_results"):
54
+ return json.dumps(result["tool_results"], indent=2)
55
+ return f"Sorry, an error occurred: {str(e)}"
56
+ class PropertiesRequest(BaseModel):
57
+ query: str
58
+ @router.post("/properties-by-context", response_description="Get properties", status_code=status.HTTP_200_OK)
59
+ async def get_properties(request: Request, response: Response, body: PropertiesRequest):
60
+ """
61
+ Handles the get properties request.
62
+ """
63
+ try:
64
+ # Extract the query from the request body
65
+ query = body.query
66
+ if not query:
67
+ raise HTTPException(status_code=400, detail="Query is required")
68
+
69
+ result = await get_properties_by_context(query,properties_collection)
70
+
71
+ # Return the result
72
+ return {"response": result}
73
+ except Exception as e:
74
+ logger.error("Error in get_properties: %s", str(e))
75
+ raise HTTPException(status_code=500, detail="Internal Server Error")
serialization.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bson import ObjectId
2
+ from datetime import datetime
3
+
4
+ def convert_to_serializable(obj):
5
+ if isinstance(obj, ObjectId):
6
+ return str(obj)
7
+ if isinstance(obj, datetime):
8
+ return obj.isoformat()
9
+ if isinstance(obj, dict):
10
+ return {k: convert_to_serializable(v) for k, v in obj.items()}
11
+ if isinstance(obj, list):
12
+ return [convert_to_serializable(item) for item in obj]
13
+ return obj
tool.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from langchain_core.tools import tool
3
+ from langchain_core.documents import Document
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from serialization import convert_to_serializable
6
+ from typing import List
7
+ # Set up logging
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # Initialize embeddings
12
+ embedmodel = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
13
+
14
+ # Raw vector search function
15
+ def raw_vector_search(collection, query: str, index_name: str, k: int = 10) -> List[Document]:
16
+ try:
17
+ query_embedding = embedmodel.embed_query(query)
18
+ pipeline = [
19
+ {
20
+ "$vectorSearch": {
21
+ "index": index_name,
22
+ "path": "revoemb",
23
+ "queryVector": query_embedding,
24
+ "numCandidates": k * 10,
25
+ "limit": k
26
+ }
27
+ },
28
+ {
29
+ "$project": {
30
+ "revoemb": 0,
31
+ "score": {"$meta": "vectorSearchScore"}
32
+ }
33
+ }
34
+ ]
35
+ results = list(collection.aggregate(pipeline))
36
+ return [Document(
37
+ page_content=r.get("description", ""),
38
+ metadata={k: v for k, v in r.items() if k != "description"},
39
+ score=r.get("score", 0)
40
+ ) for r in results]
41
+ except Exception as e:
42
+ logger.error("Vector search error: %s", str(e))
43
+ return []
44
+
45
+ # Define tools
46
+ @tool
47
+ def properties_vector_search(query: str, properties_collection=None) -> List[dict]:
48
+ """Search for real estate properties based on a query."""
49
+ try:
50
+ if properties_collection is None:
51
+ raise ValueError("Properties collection not provided")
52
+ results = raw_vector_search(properties_collection, query, "properties_vector_index")
53
+ logger.info("Properties query: %s, results: %d", query, len(results))
54
+ return [
55
+ {
56
+ "content": r.page_content,
57
+ "metadata": convert_to_serializable(r.metadata),
58
+ "score": r.metadata.get("score", 0)
59
+ }
60
+ for r in results
61
+ ]
62
+ except Exception as e:
63
+ logger.error("Properties search error: %s", str(e))
64
+ return []
65
+
66
+ @tool
67
+ def companies_vector_search(query: str, companies_collection=None) -> List[dict]:
68
+ """Search for real estate companies based on a query."""
69
+ try:
70
+ if companies_collection is None:
71
+ raise ValueError("Companies collection not provided")
72
+ results = raw_vector_search(companies_collection, query, "companies_vector_index")
73
+ logger.info("Companies query: %s, results: %d", query, len(results))
74
+ return [
75
+ {
76
+ "content": r.page_content,
77
+ "metadata": convert_to_serializable(r.metadata),
78
+ "score": r.metadata.get("score", 0)
79
+ }
80
+ for r in results
81
+ ]
82
+ except Exception as e:
83
+ logger.error("Companies search error: %s", str(e))
84
+ return []
85
+ async def get_properties_by_context(query: str, properties_collection=None) -> List[dict]:
86
+ """Get properties by context."""
87
+ try:
88
+ if properties_collection is None:
89
+ raise ValueError("Properties collection not provided")
90
+ query_embedding = embedmodel.embed_query(query)
91
+ pipeline = [
92
+ {
93
+ "$vectorSearch": {
94
+ "index": "properties_vector_index",
95
+ "path": "revoemb",
96
+ "queryVector": query_embedding,
97
+ "numCandidates": 100,
98
+ "limit": 10
99
+ }
100
+ },
101
+ {
102
+ "$project": {
103
+ "revoemb": 0,
104
+ "score": {"$meta": "vectorSearchScore"}
105
+ }
106
+ }
107
+ ]
108
+ results = list(properties_collection.aggregate(pipeline))
109
+
110
+ # Convert ObjectId fields to strings
111
+ for result in results:
112
+ if '_id' in result:
113
+ result['_id'] = str(result['_id'])
114
+ if 'companyId' in result:
115
+ result['companyId'] = str(result['companyId'])
116
+ if 'userId' in result:
117
+ result['userId'] = str(result['userId'])
118
+ # Add other ObjectId fields as needed (e.g., purchaseId)
119
+ if 'purchaseId' in result:
120
+ result['purchaseId'] = str(result['purchaseId'])
121
+
122
+ logger.info("Properties by context query: %s, results: %d", query, len(results))
123
+ return results
124
+ except Exception as e:
125
+ logger.error("Properties by context error: %s", str(e))
126
+ return []