Spaces:

jibrla
/

Revochatbot

Runtime error

App Files Files Community

GitHub Actions commited on May 5, 2025

Commit

56bf5b7

1 Parent(s): 55ab1b4

Deploy chatbot from GitHub Actions

Browse files

Files changed (12) hide show

Dockerfile +8 -0
GeminiAgent.py +228 -0
__pycache__/GeminiAgent.cpython-311.pyc +0 -0
__pycache__/main.cpython-311.pyc +0 -0
__pycache__/routes.cpython-311.pyc +0 -0
__pycache__/serialization.cpython-311.pyc +0 -0
__pycache__/tool.cpython-311.pyc +0 -0
main.py +15 -0
requirements.txt +147 -0
routes.py +75 -0
serialization.py +13 -0
tool.py +126 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,8 @@

+FROM python:3.10-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install --no-cache-dir gunicorn
+COPY GeminiAgent.py main.py routes.py serialization.py tool.py .
+EXPOSE 7860
+CMD ["gunicorn", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "main:app", "--bind", "0.0.0.0:7860"]

GeminiAgent.py ADDED Viewed

	@@ -0,0 +1,228 @@

+import asyncio
+import logging
+import os
+import json
+import operator
+from typing import TypedDict, List, Annotated
+from langchain_core.tools import tool
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langgraph.graph import StateGraph, END
+from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, ToolMessage, SystemMessage
+from dotenv import load_dotenv
+from pymongo import MongoClient
+from tool import properties_vector_search, companies_vector_search
+from langgraph.checkpoint.memory import MemorySaver
+checkpointer = MemorySaver()
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Load environment variables
+load_dotenv()
+CONNECTION_STRING = os.getenv("MongoURI")
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+if not GEMINI_API_KEY or not CONNECTION_STRING:
+    logger.error("Missing required environment variables: MongoURI or GEMINI_API_KEY")
+    raise ValueError("Missing required environment variables.")
+# Initialize MongoDB client with timeout settings
+mongo_client = MongoClient(
+    CONNECTION_STRING,
+    serverSelectionTimeoutMS=30000,
+    connectTimeoutMS=30000,
+    socketTimeoutMS=30000
+)
+properties_collection = mongo_client["revostate"]["properties"]
+companies_collection = mongo_client["revostate"]["companies"]
+# Verify collections
+try:
+    logger.info("Properties count: %d", properties_collection.count_documents({}))
+    logger.info("Companies count: %d", companies_collection.count_documents({}))
+except Exception as e:
+    logger.error("MongoDB connection error: %s", str(e))
+    raise
+# Define agent state
+class AgentState(TypedDict):
+    messages: Annotated[list[AnyMessage], operator.add]
+# Define system prompt
+system_prompt = """
+You are a knowledgeable and friendly real estate assistant specializing in properties and companies in Addis Ababa. Your goal is to provide comprehensive, tailored responses that match the user's request exactly, including relevant company or real estate agency details only when explicitly requested.
+Key Guidelines:
+1. Response Style:
+   - Use natural, conversational language while maintaining professionalism.
+   - Adapt response format based on the user's request:
+     * For "details" or specific queries (e.g., coordinates, company info), include all available metadata.
+     * For "summary" or brief info requests, provide a concise overview.
+     * Default to detailed responses unless specified otherwise.
+   - Always conclude by asking if the user needs more information or has other questions.
+2. Property Information:
+   - Prioritize key details: title, price, location.
+   - For detailed responses, include:
+     * Full address with subcity/district.
+     * Exact coordinates (latitude/longitude) when available.
+     * Specifications: bedrooms, bathrooms, area, built year, etc.
+     * Amenities, furnished status, and special features.
+     * Clear description of the property.
+   - Present information in bullet points or short paragraphs for clarity.
+   - If properties are from nearby areas, clearly state this (e.g., "This property is in Lemi Kura, near Bole").
+   - If exact address or coordinates are unavailable, note this explicitly.
+3. Company/Real Estate Agency Information:
+   - Provide company details only when the user explicitly requests information about the real estate agency or property owner (e.g., "Can I also get information about the real estate owner of the property?").
+   - When company details are requested:
+     * Use the `companies_vector_search` tool to retrieve information based on the `companyId` referenced in the property data.
+     * Include:
+       - Company name, services offered, and contact details (phone, email, website).
+       - Full address and years in operation (if available).
+       - Specializations or notable projects.
+     * If `companies_vector_search` returns no results, state: "Company details are not available for this listing. Please contact the listing platform for more information."
+   - Do not fetch or include company details unless explicitly requested in the query.
+4. Query Handling:
+   - For location-based queries (e.g., "Yeka subcity"), only include properties/companies in that area unless none are found, then mention nearby areas.
+   - When coordinates are requested, present them prominently.
+   - If the user asks about properties without mentioning the company or real estate agency (e.g., "Properties in Bole"), provide only property details based on the query.
+   - If the user asks for company or real estate details (e.g., "Tell me the address of the real estate that created these properties"), retrieve and include company details using the `companyId` from the property data.
+   - Ensure responses are accurate and avoid fabricating unavailable data.
+5. Example Responses:
+   **Property-Only Query:**
+   User Query: "Can I get a 3-Bedroom Apartment for Sale in Bole?"
+   Response:
+   "I found a 3-bedroom apartment for sale in Bole Subcity:
+   - **Title**: 3bdrm Apartment in Bole for sale
+   - **Price**: 17,000,000 ETB
+   - **Location**: Near Bole International Airport, Bole Subcity
+   - **Specifications**:
+     * Bedrooms: 3
+     * Bathrooms: 2
+     * Area: 167 sqm
+     * Built: 2018
+   - **Features**: Furnished, flexible payment plan (15% down payment)
+   - **Description**: Enjoy a spacious, modern apartment with premium amenities near the airport.
+   Do you need more details or other listings?"
+   **Property and Company Query:**
+   User Query: "Can I get a 3-Bedroom Apartment for Sale in Bole? Can I also get information about the real estate owner of the property?"
+   Response:
+   "I found a 3-bedroom apartment for sale in Bole Subcity, along with details of the real estate company that listed it:
+   **Property Details:**
+   - **Title**: 3bdrm Apartment in Bole for sale
+   - **Price**: 17,000,000 ETB
+   - **Location**: Near Bole International Airport, Bole Subcity
+   - **Specifications**:
+     * Bedrooms: 3
+     * Bathrooms: 2
+     * Area: 167 sqm
+     * Built: 2018
+   - **Features**: Furnished, flexible payment plan (15% down payment)
+   - **Description**: Enjoy a spacious, modern apartment with premium amenities near the airport.
+   **Real Estate Company Details:**
+   - **Name**: Ayat Real Estate
+   - **Services**: Specializes in premium residential and commercial properties
+   - **Address**: [Insert full address from companies_vector_search]
+   - **Contact**:
+     * Phone: +251 969 60 60 60
+     * Email: jibrilarbicho185@gmail.com
+   - **Description**: Ayat Real Estate is known for high-quality developments in Addis Ababa.
+   Do you need more details about this property, other listings, or additional company information?"
+6. Tool Usage:
+   - Call `companies_vector_search` only when the user explicitly requests company or real estate agency details, using the `companyId` from each property's data.
+   - For queries involving multiple properties with company details requested, call the tool for each unique `companyId`.
+   - Do not call `companies_vector_search` for queries that only ask for property details (e.g., "Properties in Bole").
+   - If `companies_vector_search` is called and returns no results, state: "Company details are not available for this listing. Please contact the listing platform for more information."
+7. Data Integrity:
+   - Use property data fields (e.g., `companyId`, `address`, `price`) accurately.
+   - For missing data (e.g., address, coordinates), indicate: "Specific [field] is unavailable for this listing."
+   - Ensure company details, when requested, align with the property's `companyId`.
+This prompt ensures accurate, query-specific responses, fetching company details via `companies_vector_search` only when explicitly requested, while providing property details for all relevant queries.
+"""
+# Define Agent class
+class Agent:
+    def __init__(self, model, tools,checkpointer, system=""):
+        self.system = system
+        graph = StateGraph(AgentState)
+        graph.add_node("llm", self.call_gemini)
+        graph.add_node("action", self.take_action)
+        graph.add_conditional_edges(
+            "llm",
+            self.exists_action,
+            {True: "action", False: END}
+        )
+        graph.set_entry_point("llm")
+        graph.add_edge("action", "llm")
+        self.tools = {t.name: t for t in tools}
+        self.model = model.bind_tools(tools)
+        self.graph = graph.compile(checkpointer=checkpointer)
+    def exists_action(self, state: AgentState):
+        result = state['messages'][-1]
+        return len(result.tool_calls) > 0
+    def call_gemini(self, state: AgentState):
+        messages = state['messages']
+        if self.system:
+            messages = [SystemMessage(content=self.system)] + messages
+        message = self.model.invoke(messages)
+        return {'messages': [message]}
+# take action
+    def take_action(self, state: AgentState):
+        tool_calls = state['messages'][-1].tool_calls
+        results = []
+        for t in tool_calls:
+            print(f"Calling: {t}")
+            if t['name'] not in self.tools:
+                print("\n ....bad tool name....")
+                result = "bad tool name, retry"
+            else:
+                # Pass collections to tool functions
+                if t['name'] == 'properties_vector_search':
+                    result = self.tools[t['name']].invoke({'query': t['args']['query'], 'properties_collection': properties_collection})
+                elif t['name'] == 'companies_vector_search':
+                    result = self.tools[t['name']].invoke({'query': t['args']['query'], 'companies_collection': companies_collection})
+                else:
+                    result = self.tools[t['name']].invoke(t['args'])
+            # Preserve result as a dictionary for detailed formatting
+            results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=json.dumps(result)))
+        print("Back to the model!")
+        return {'messages': results}
+# Initialize LLM and Agent
+llm = ChatGoogleGenerativeAI(
+    model="gemini-1.5-flash",
+    google_api_key=GEMINI_API_KEY,
+    temperature=0.7
+)
+tools = [properties_vector_search, companies_vector_search]
+agent = Agent(model=llm, tools=tools, system=system_prompt, checkpointer=checkpointer)
+# Run agent
+async def run_agent(query: str) -> str:
+    state = {
+        "messages": [HumanMessage(content=query)]
+    }
+    try:
+        result = await agent.graph.ainvoke(state)
+        last_message = result["messages"][-1].content
+        return last_message
+    except Exception as e:
+        logger.error("Agent execution error: %s", str(e))
+        if "tool_results" in locals() and locals().get("tool_results"):
+            return json.dumps(locals()["tool_results"], indent=2)
+        return f"Sorry, an error occurred: {str(e)}"

__pycache__/GeminiAgent.cpython-311.pyc ADDED Viewed

Binary file (14.2 kB). View file

__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (655 Bytes). View file

__pycache__/routes.cpython-311.pyc ADDED Viewed

Binary file (4.79 kB). View file

__pycache__/serialization.cpython-311.pyc ADDED Viewed

Binary file (1.37 kB). View file

__pycache__/tool.cpython-311.pyc ADDED Viewed

Binary file (6.72 kB). View file

main.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from routes import router
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins="*",
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.include_router(router)  # Mount router without prefix

requirements.txt ADDED Viewed

	@@ -0,0 +1,147 @@

+annotated-types==0.7.0
+anyio==4.9.0
+asttokens==3.0.0
+cachetools==5.5.2
+certifi==2025.4.26
+charset-normalizer==3.4.1
+click==8.1.8
+comm==0.2.2
+debugpy==1.8.14
+decorator==5.2.1
+distro==1.9.0
+dnspython==2.7.0
+einops==0.8.1
+email_validator==2.2.0
+executing==2.2.0
+fastapi==0.115.12
+fastapi-cli==0.0.7
+filelock==3.18.0
+filetype==1.2.0
+fsspec==2025.3.2
+google-ai-generativelanguage==0.6.18
+google-api-core==2.24.2
+google-auth==2.39.0
+googleapis-common-protos==1.70.0
+greenlet==3.2.1
+groq==0.24.0
+grpcio==1.71.0
+grpcio-status==1.71.0
+h11==0.16.0
+hf-xet==1.1.0
+httpcore==1.0.9
+httptools==0.6.4
+httpx==0.28.1
+huggingface-hub==0.30.2
+idna==3.10
+ipykernel==6.29.5
+ipython==9.2.0
+ipython_pygments_lexers==1.1.1
+jedi==0.19.2
+Jinja2==3.1.6
+joblib==1.4.2
+jsonpatch==1.33
+jsonpointer==3.0.0
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+langchain==0.3.24
+langchain-core==0.3.57
+langchain-google-genai==2.1.4
+langchain-groq==0.3.2
+langchain-huggingface==0.1.2
+langchain-mongodb==0.6.1
+langchain-text-splitters==0.3.8
+langgraph==0.4.1
+langgraph-checkpoint==2.0.25
+langgraph-checkpoint-mongodb==0.1.3
+langgraph-prebuilt==0.1.8
+langgraph-sdk==0.1.66
+langsmith==0.3.40
+lark==1.2.2
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+motor==3.7.0
+mpmath==1.3.0
+nest-asyncio==1.6.0
+networkx==3.4.2
+numpy==2.2.5
+nvidia-cublas-cu12==12.6.4.1
+nvidia-cuda-cupti-cu12==12.6.80
+nvidia-cuda-nvrtc-cu12==12.6.77
+nvidia-cuda-runtime-cu12==12.6.77
+nvidia-cudnn-cu12==9.5.1.17
+nvidia-cufft-cu12==11.3.0.4
+nvidia-cufile-cu12==1.11.1.6
+nvidia-curand-cu12==10.3.7.77
+nvidia-cusolver-cu12==11.7.1.2
+nvidia-cusparse-cu12==12.5.4.2
+nvidia-cusparselt-cu12==0.6.3
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.6.85
+nvidia-nvtx-cu12==12.6.77
+orjson==3.10.18
+ormsgpack==1.9.1
+packaging==24.2
+pandas==2.2.3
+parso==0.8.4
+pexpect==4.9.0
+pillow==11.2.1
+platformdirs==4.3.7
+prompt_toolkit==3.0.51
+proto-plus==1.26.1
+protobuf==5.29.4
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pydantic==2.11.4
+pydantic_core==2.33.2
+Pygments==2.19.1
+pygraphviz==1.14
+pymongo==4.11.3
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.2
+pyzmq==26.4.0
+regex==2024.11.6
+requests==2.32.3
+requests-toolbelt==1.0.0
+rich==14.0.0
+rich-toolkit==0.14.4
+rsa==4.9.1
+safetensors==0.5.3
+scikit-learn==1.6.1
+scipy==1.15.2
+sentence-transformers==4.1.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+SQLAlchemy==2.0.40
+stack-data==0.6.3
+starlette==0.46.2
+sympy==1.14.0
+tenacity==9.1.2
+threadpoolctl==3.6.0
+tokenizers==0.21.1
+torch==2.7.0
+tornado==6.4.2
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.51.3
+triton==3.3.0
+typer==0.15.3
+typing-inspection==0.4.0
+typing_extensions==4.13.2
+tzdata==2025.2
+urllib3==2.4.0
+uvicorn==0.34.2
+uvloop==0.21.0
+watchfiles==1.0.5
+wcwidth==0.2.13
+websockets==15.0.1
+xxhash==3.5.0
+zstandard==0.23.0

routes.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from fastapi import APIRouter, Body, Request, Response, HTTPException, status
+import asyncio
+import logging
+import json
+from GeminiAgent import agent,properties_collection
+from langchain_core.messages import HumanMessage
+from pydantic import BaseModel
+from typing import Any
+from tool import get_properties_by_context
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+router = APIRouter()
+class QueryRequest(BaseModel):
+    query: Any
+    thread_id: str
+@router.post("/chatbot", response_description="Chatbot response", status_code=status.HTTP_200_OK)
+async def chatbot_response(request: Request, response: Response, body: QueryRequest):
+    """
+    Handles the chatbot response.
+    """
+    try:
+        # Extract the query from the request body
+        query = body.query
+        thread_id=body.thread_id
+        if not query:
+            raise HTTPException(status_code=400, detail="Query is required")
+        # Run the agent with the provided query
+        result = await run_agent(query, thread_id=thread_id)
+        # Return the result
+        return {"response": result}
+    except Exception as e:
+        logger.error("Error in chatbot_response: %s", str(e))
+        raise HTTPException(status_code=500, detail="Internal Server Error")
+async def run_agent(query: str,thread_id:str) -> str:
+    state = {
+        "messages": [HumanMessage(content=query)]
+    }
+    try:
+        config = {"configurable": {"thread_id": thread_id}}
+        result = await agent.graph.ainvoke(state, config)
+        last_message = result["messages"][-1].content
+        return last_message
+    except Exception as e:
+        logger.error("Agent execution error: %s", str(e))
+        if "tool_results" in result and result.get("tool_results"):
+            return json.dumps(result["tool_results"], indent=2)
+        return f"Sorry, an error occurred: {str(e)}"
+class PropertiesRequest(BaseModel):
+    query: str
+@router.post("/properties-by-context", response_description="Get properties", status_code=status.HTTP_200_OK)
+async def get_properties(request: Request, response: Response, body: PropertiesRequest):
+    """
+    Handles the get properties request.
+    """
+    try:
+        # Extract the query from the request body
+        query = body.query
+        if not query:
+            raise HTTPException(status_code=400, detail="Query is required")
+        result = await get_properties_by_context(query,properties_collection)
+        # Return the result
+        return {"response": result}
+    except Exception as e:
+        logger.error("Error in get_properties: %s", str(e))
+        raise HTTPException(status_code=500, detail="Internal Server Error")

serialization.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from bson import ObjectId
+from datetime import datetime
+def convert_to_serializable(obj):
+    if isinstance(obj, ObjectId):
+        return str(obj)
+    if isinstance(obj, datetime):
+        return obj.isoformat()
+    if isinstance(obj, dict):
+        return {k: convert_to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [convert_to_serializable(item) for item in obj]
+    return obj

tool.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import logging
+from langchain_core.tools import tool
+from langchain_core.documents import Document
+from langchain_huggingface import HuggingFaceEmbeddings
+from serialization import convert_to_serializable
+from typing import List
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Initialize embeddings
+embedmodel = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+# Raw vector search function
+def raw_vector_search(collection, query: str, index_name: str, k: int = 10) -> List[Document]:
+    try:
+        query_embedding = embedmodel.embed_query(query)
+        pipeline = [
+            {
+                "$vectorSearch": {
+                    "index": index_name,
+                    "path": "revoemb",
+                    "queryVector": query_embedding,
+                    "numCandidates": k * 10,
+                    "limit": k
+                }
+            },
+            {
+                "$project": {
+                    "revoemb": 0,
+                    "score": {"$meta": "vectorSearchScore"}
+                }
+            }
+        ]
+        results = list(collection.aggregate(pipeline))
+        return [Document(
+            page_content=r.get("description", ""),
+            metadata={k: v for k, v in r.items() if k != "description"},
+            score=r.get("score", 0)
+        ) for r in results]
+    except Exception as e:
+        logger.error("Vector search error: %s", str(e))
+        return []
+# Define tools
+@tool
+def properties_vector_search(query: str, properties_collection=None) -> List[dict]:
+    """Search for real estate properties based on a query."""
+    try:
+        if properties_collection is None:
+            raise ValueError("Properties collection not provided")
+        results = raw_vector_search(properties_collection, query, "properties_vector_index")
+        logger.info("Properties query: %s, results: %d", query, len(results))
+        return [
+            {
+                "content": r.page_content,
+                "metadata": convert_to_serializable(r.metadata),
+                "score": r.metadata.get("score", 0)
+            }
+            for r in results
+        ]
+    except Exception as e:
+        logger.error("Properties search error: %s", str(e))
+        return []
+@tool
+def companies_vector_search(query: str, companies_collection=None) -> List[dict]:
+    """Search for real estate companies based on a query."""
+    try:
+        if companies_collection is None:
+            raise ValueError("Companies collection not provided")
+        results = raw_vector_search(companies_collection, query, "companies_vector_index")
+        logger.info("Companies query: %s, results: %d", query, len(results))
+        return [
+            {
+                "content": r.page_content,
+                "metadata": convert_to_serializable(r.metadata),
+                "score": r.metadata.get("score", 0)
+            }
+            for r in results
+        ]
+    except Exception as e:
+        logger.error("Companies search error: %s", str(e))
+        return []
+async def get_properties_by_context(query: str, properties_collection=None) -> List[dict]:
+    """Get properties by context."""
+    try:
+        if properties_collection is None:
+            raise ValueError("Properties collection not provided")
+        query_embedding = embedmodel.embed_query(query)
+        pipeline = [
+            {
+                "$vectorSearch": {
+                    "index": "properties_vector_index",
+                    "path": "revoemb",
+                    "queryVector": query_embedding,
+                    "numCandidates":  100,
+                    "limit": 10
+                }
+            },
+            {
+                "$project": {
+                    "revoemb": 0,
+                    "score": {"$meta": "vectorSearchScore"}
+                }
+            }
+        ]
+        results = list(properties_collection.aggregate(pipeline))
+        # Convert ObjectId fields to strings
+        for result in results:
+            if '_id' in result:
+                result['_id'] = str(result['_id'])
+            if 'companyId' in result:
+                result['companyId'] = str(result['companyId'])
+            if 'userId' in result:
+                result['userId'] = str(result['userId'])
+            # Add other ObjectId fields as needed (e.g., purchaseId)
+            if 'purchaseId' in result:
+                result['purchaseId'] = str(result['purchaseId'])
+        logger.info("Properties by context query: %s, results: %d", query, len(results))
+        return results
+    except Exception as e:
+        logger.error("Properties by context error: %s", str(e))
+        return []