Spaces:
Build error
Build error
| """ | |
| Module: vector_database | |
| Description: Handles storing and searching real estate listings using ChromaDB and OpenAI embeddings. | |
| """ | |
| import json | |
| from langchain_openai import OpenAIEmbeddings | |
| from langchain_chroma import Chroma | |
| from langchain_core.documents import Document | |
| class VectorDatabase: | |
| """Handles vector-based storage and retrieval of real estate listings using ChromaDB.""" | |
| def __init__(self, listings_path="Data/listings.json", db_path="Data/chroma_langchain_db"): | |
| """ | |
| Initializes the vector store by loading real estate listings and setting up ChromaDB. | |
| Args: | |
| listings_path (str): Path to the JSON file containing real estate listings. | |
| db_path (str): Path to the directory where ChromaDB stores embeddings. | |
| """ | |
| self.listings_path = listings_path | |
| self.db_path = db_path | |
| self.embedding_model = OpenAIEmbeddings(model="text-embedding-3-large") | |
| # Load and process listings | |
| self.listings = self._load_listings() | |
| self.documents = self._prepare_documents() | |
| # Initialize ChromaDB for storage | |
| self.vector_store = Chroma( | |
| collection_name="real_estate_listings", | |
| embedding_function=self.embedding_model, | |
| persist_directory=self.db_path | |
| ) | |
| def _load_listings(self): | |
| """ | |
| Loads real estate listings from a JSON file. | |
| Returns: | |
| list: A list of real estate listings. | |
| """ | |
| try: | |
| with open(self.listings_path, "r") as f: | |
| return json.load(f) | |
| except (FileNotFoundError, json.JSONDecodeError) as e: | |
| print(f"β Error loading listings file: {e}") | |
| return [] | |
| def _prepare_documents(self): | |
| """ | |
| Converts listings into Document objects with metadata. | |
| Returns: | |
| list: A list of Document objects with structured metadata. | |
| """ | |
| return [ | |
| Document( | |
| page_content=listing["Description"], # Store property description | |
| metadata={ | |
| "id": listing["id"], | |
| "property_type": listing["Property Type"], | |
| "neighborhood": listing["Neighborhood"], | |
| "city": listing["City"], | |
| "state": listing["State"], | |
| "price": listing["Price"], | |
| "house_size": listing["House Size"], | |
| "bedrooms": listing["Bedrooms"], | |
| "bathrooms": listing["Bathrooms"], | |
| "neighborhood_description": listing["Neighborhood Description"], | |
| "image_path": listing["image_path"] | |
| } | |
| ) | |
| for listing in self.listings | |
| ] | |
| def store_listings(self): | |
| """ | |
| Stores real estate listings in ChromaDB. | |
| """ | |
| try: | |
| self.vector_store.add_documents(self.documents) | |
| print("β Listings successfully stored in ChromaDB!") | |
| except Exception as e: | |
| print(f"β Error storing listings: {e}") | |
| def format_user_prefs(self, user_prefs): | |
| """ | |
| Converts structured user preferences into a readable search query. | |
| Args: | |
| user_prefs (dict): Dictionary containing user preferences. | |
| Returns: | |
| str: A natural language query string for embedding. | |
| """ | |
| try: | |
| return ( | |
| f"Looking for a property in {', '.join(user_prefs.get('city', []))}, {', '.join(user_prefs.get('state', []))}. " | |
| f"House size preference: {user_prefs.get('house_size', 'any size')}. " | |
| f"Maximum price: {user_prefs.get('max_price', '100000')}. " | |
| f"Number of Bedrooms: {user_prefs.get('num_bedrooms', 3)}. " | |
| f"Number of Bathrooms: {user_prefs.get('num_bathrooms', 3)}. " | |
| f"Amenities: {', '.join(user_prefs.get('amenities', []))}. " | |
| f"Property description: {user_prefs.get('description', 'no preference')}." | |
| ) | |
| except Exception as e: | |
| print(f"β Error formatting user preferences: {e}") | |
| return "" | |
| def search(self, user_prefs, k=5): | |
| """ | |
| Performs a similarity search based on user preferences and retrieves matching listings with images. | |
| Args: | |
| user_prefs (dict): Dictionary containing user search preferences. | |
| k (int): Number of top matches to return. | |
| Returns: | |
| list: A list of dictionaries containing listing details and image paths. | |
| """ | |
| try: | |
| # Convert user preferences into a natural language query | |
| query = self.format_user_prefs(user_prefs) | |
| # Generate embeddings for the query | |
| query_embedding = self.embedding_model.embed_query(query) | |
| if not isinstance(query_embedding, list): | |
| raise ValueError("β Embedding function did not return a valid vector list.") | |
| # Perform similarity search using the embedding | |
| results = self.vector_store.similarity_search_by_vector(query_embedding, k=k) | |
| # Extract relevant metadata, including image paths | |
| listings_with_images = [ | |
| { | |
| "description": doc.page_content, | |
| "id": doc.metadata.get("id"), | |
| "city": doc.metadata.get("city", "Unknown"), | |
| "state": doc.metadata.get("state", "Unknown"), | |
| "price": doc.metadata.get("price", "N/A"), | |
| "bedrooms": doc.metadata.get("bedrooms", "N/A"), | |
| "bathrooms": doc.metadata.get("bathrooms", "N/A"), | |
| "house_size": doc.metadata.get("house_size", "N/A"), | |
| "neighborhood": doc.metadata.get("neighborhood", "Unknown"), | |
| "neighborhood_description": doc.metadata.get("neighborhood_description", ""), | |
| "image_path": doc.metadata.get("image_path", "β No image available") # Ensure image path is included | |
| } | |
| for doc in results | |
| ] | |
| return listings_with_images | |
| except Exception as e: | |
| print(f"β Error during search: {e}") | |
| return [] | |