Spaces:
Build error
Build error
File size: 6,345 Bytes
abc7a1c 03197ad abc7a1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
"""
Module: vector_database
Description: Handles storing and searching real estate listings using ChromaDB and OpenAI embeddings.
"""
import json
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document
class VectorDatabase:
"""Handles vector-based storage and retrieval of real estate listings using ChromaDB."""
def __init__(self, listings_path="Data/listings.json", db_path="Data/chroma_langchain_db"):
"""
Initializes the vector store by loading real estate listings and setting up ChromaDB.
Args:
listings_path (str): Path to the JSON file containing real estate listings.
db_path (str): Path to the directory where ChromaDB stores embeddings.
"""
self.listings_path = listings_path
self.db_path = db_path
self.embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")
# Load and process listings
self.listings = self._load_listings()
self.documents = self._prepare_documents()
# Initialize ChromaDB for storage
self.vector_store = Chroma(
collection_name="real_estate_listings",
embedding_function=self.embedding_model,
persist_directory=self.db_path
)
def _load_listings(self):
"""
Loads real estate listings from a JSON file.
Returns:
list: A list of real estate listings.
"""
try:
with open(self.listings_path, "r") as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError) as e:
print(f"β Error loading listings file: {e}")
return []
def _prepare_documents(self):
"""
Converts listings into Document objects with metadata.
Returns:
list: A list of Document objects with structured metadata.
"""
return [
Document(
page_content=listing["Description"], # Store property description
metadata={
"id": listing["id"],
"property_type": listing["Property Type"],
"neighborhood": listing["Neighborhood"],
"city": listing["City"],
"state": listing["State"],
"price": listing["Price"],
"house_size": listing["House Size"],
"bedrooms": listing["Bedrooms"],
"bathrooms": listing["Bathrooms"],
"neighborhood_description": listing["Neighborhood Description"],
"image_path": listing["image_path"]
}
)
for listing in self.listings
]
def store_listings(self):
"""
Stores real estate listings in ChromaDB.
"""
try:
self.vector_store.add_documents(self.documents)
print("β
Listings successfully stored in ChromaDB!")
except Exception as e:
print(f"β Error storing listings: {e}")
def format_user_prefs(self, user_prefs):
"""
Converts structured user preferences into a readable search query.
Args:
user_prefs (dict): Dictionary containing user preferences.
Returns:
str: A natural language query string for embedding.
"""
try:
return (
f"Looking for a property in {', '.join(user_prefs.get('city', []))}, {', '.join(user_prefs.get('state', []))}. "
f"House size preference: {user_prefs.get('house_size', 'any size')}. "
f"Maximum price: {user_prefs.get('max_price', '100000')}. "
f"Number of Bedrooms: {user_prefs.get('num_bedrooms', 3)}. "
f"Number of Bathrooms: {user_prefs.get('num_bathrooms', 3)}. "
f"Amenities: {', '.join(user_prefs.get('amenities', []))}. "
f"Property description: {user_prefs.get('description', 'no preference')}."
)
except Exception as e:
print(f"β Error formatting user preferences: {e}")
return ""
def search(self, user_prefs, k=5):
"""
Performs a similarity search based on user preferences and retrieves matching listings with images.
Args:
user_prefs (dict): Dictionary containing user search preferences.
k (int): Number of top matches to return.
Returns:
list: A list of dictionaries containing listing details and image paths.
"""
try:
# Convert user preferences into a natural language query
query = self.format_user_prefs(user_prefs)
# Generate embeddings for the query
query_embedding = self.embedding_model.embed_query(query)
if not isinstance(query_embedding, list):
raise ValueError("β Embedding function did not return a valid vector list.")
# Perform similarity search using the embedding
results = self.vector_store.similarity_search_by_vector(query_embedding, k=k)
# Extract relevant metadata, including image paths
listings_with_images = [
{
"description": doc.page_content,
"id": doc.metadata.get("id"),
"city": doc.metadata.get("city", "Unknown"),
"state": doc.metadata.get("state", "Unknown"),
"price": doc.metadata.get("price", "N/A"),
"bedrooms": doc.metadata.get("bedrooms", "N/A"),
"bathrooms": doc.metadata.get("bathrooms", "N/A"),
"house_size": doc.metadata.get("house_size", "N/A"),
"neighborhood": doc.metadata.get("neighborhood", "Unknown"),
"neighborhood_description": doc.metadata.get("neighborhood_description", ""),
"image_path": doc.metadata.get("image_path", "β No image available") # Ensure image path is included
}
for doc in results
]
return listings_with_images
except Exception as e:
print(f"β Error during search: {e}")
return []
|