File size: 6,345 Bytes
abc7a1c
 
 
 
 
 
 
 
 
 
 
 
 
03197ad
abc7a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
"""
Module: vector_database
Description: Handles storing and searching real estate listings using ChromaDB and OpenAI embeddings.
"""

import json
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document

class VectorDatabase:
    """Handles vector-based storage and retrieval of real estate listings using ChromaDB."""

    def __init__(self, listings_path="Data/listings.json", db_path="Data/chroma_langchain_db"):
        """
        Initializes the vector store by loading real estate listings and setting up ChromaDB.

        Args:
            listings_path (str): Path to the JSON file containing real estate listings.
            db_path (str): Path to the directory where ChromaDB stores embeddings.
        """
        self.listings_path = listings_path
        self.db_path = db_path
        self.embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")

        # Load and process listings
        self.listings = self._load_listings()
        self.documents = self._prepare_documents()

        # Initialize ChromaDB for storage
        self.vector_store = Chroma(
            collection_name="real_estate_listings",
            embedding_function=self.embedding_model,
            persist_directory=self.db_path
        )

    def _load_listings(self):
        """
        Loads real estate listings from a JSON file.

        Returns:
            list: A list of real estate listings.
        """
        try:
            with open(self.listings_path, "r") as f:
                return json.load(f)
        except (FileNotFoundError, json.JSONDecodeError) as e:
            print(f"❌ Error loading listings file: {e}")
            return []

    def _prepare_documents(self):
        """
        Converts listings into Document objects with metadata.

        Returns:
            list: A list of Document objects with structured metadata.
        """
        return [
            Document(
                page_content=listing["Description"],  # Store property description
                metadata={
                    "id": listing["id"],  
                    "property_type": listing["Property Type"],
                    "neighborhood": listing["Neighborhood"],
                    "city": listing["City"],
                    "state": listing["State"],
                    "price": listing["Price"],
                    "house_size": listing["House Size"],
                    "bedrooms": listing["Bedrooms"],
                    "bathrooms": listing["Bathrooms"],
                    "neighborhood_description": listing["Neighborhood Description"],
                    "image_path": listing["image_path"]
                }
            )
            for listing in self.listings
        ]

    def store_listings(self):
        """
        Stores real estate listings in ChromaDB.
        """
        try:
            self.vector_store.add_documents(self.documents)
            print("βœ… Listings successfully stored in ChromaDB!")
        except Exception as e:
            print(f"❌ Error storing listings: {e}")


    def format_user_prefs(self, user_prefs):
        """
        Converts structured user preferences into a readable search query.

        Args:
            user_prefs (dict): Dictionary containing user preferences.

        Returns:
            str: A natural language query string for embedding.
        """
        try:
            return (
                f"Looking for a property in {', '.join(user_prefs.get('city', []))}, {', '.join(user_prefs.get('state', []))}. "
                f"House size preference: {user_prefs.get('house_size', 'any size')}. "
                f"Maximum price: {user_prefs.get('max_price', '100000')}. "
                f"Number of Bedrooms: {user_prefs.get('num_bedrooms', 3)}. "
                f"Number of Bathrooms: {user_prefs.get('num_bathrooms', 3)}. "
                f"Amenities: {', '.join(user_prefs.get('amenities', []))}. "
                f"Property description: {user_prefs.get('description', 'no preference')}."
            )
        except Exception as e:
            print(f"❌ Error formatting user preferences: {e}")
            return ""

    def search(self, user_prefs, k=5):
        """
        Performs a similarity search based on user preferences and retrieves matching listings with images.

        Args:
            user_prefs (dict): Dictionary containing user search preferences.
            k (int): Number of top matches to return.

        Returns:
            list: A list of dictionaries containing listing details and image paths.
        """
        try:
            # Convert user preferences into a natural language query
            query = self.format_user_prefs(user_prefs)

            # Generate embeddings for the query
            query_embedding = self.embedding_model.embed_query(query)

            if not isinstance(query_embedding, list):
                raise ValueError("❌ Embedding function did not return a valid vector list.")

            # Perform similarity search using the embedding
            results = self.vector_store.similarity_search_by_vector(query_embedding, k=k)

            # Extract relevant metadata, including image paths
            listings_with_images = [
                {
                    "description": doc.page_content,
                    "id": doc.metadata.get("id"),
                    "city": doc.metadata.get("city", "Unknown"),
                    "state": doc.metadata.get("state", "Unknown"),
                    "price": doc.metadata.get("price", "N/A"),
                    "bedrooms": doc.metadata.get("bedrooms", "N/A"),
                    "bathrooms": doc.metadata.get("bathrooms", "N/A"),
                    "house_size": doc.metadata.get("house_size", "N/A"),
                    "neighborhood": doc.metadata.get("neighborhood", "Unknown"),
                    "neighborhood_description": doc.metadata.get("neighborhood_description", ""),
                    "image_path": doc.metadata.get("image_path", "❌ No image available")  # Ensure image path is included
                }
                for doc in results
            ]

            return listings_with_images

        except Exception as e:
            print(f"❌ Error during search: {e}")
            return []