File size: 3,333 Bytes
3c24daa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

# Dummy data for agents
agents = pd.DataFrame({
    'agent_id': range(1, 21),
    'location_expertise': ['Toronto', 'Vancouver', 'Montreal'] * 6 + ['Toronto', 'Vancouver'],
    'property_type_expertise': ['Condo', 'House', 'Apartment'] * 6 + ['Condo', 'House'],
    'average_deal_completion_rate': np.random.random(20),
    'language': ['English', 'French', 'Hindi', 'English', 'French', 'English', 'Hindi', 'French', 'English', 'Hindi'] * 2,
    'amenities': ['Pool,Gym', 'Gym,Parking', 'Pool,Parking,Gym', 'Parking', 'Pool,Gym', 'Gym,Parking', 'Pool', 'Gym', 'Parking,Pool', 'Pool'] * 2
})

# Function to calculate cosine similarity
def calculate_similarity(client_features, agents):
    # Combining client features
    client_combined_features = [client_features['preferred_location'] + " " + client_features['preferred_property_type'] + " " + client_features['language'] + " " + client_features['amenities']]
    
    agents['combined_features'] = agents['location_expertise'] + " " + agents['property_type_expertise'] + " " + agents['language'] + " " + agents['amenities']

    # Count Vectorizer for combined features
    vectorizer = CountVectorizer()
    count_matrix = vectorizer.fit_transform(pd.concat([pd.Series(client_combined_features), agents['combined_features']]))

    # Calculate cosine similarity
    similarity = cosine_similarity(count_matrix[0:1], count_matrix[1:])
    
    # Get top 3 agents
    top_agents_indices = np.argsort(similarity[0])[-3:][::-1]
    top_agents_scores = [similarity[0][i] for i in top_agents_indices]
    top_agents = [agents.iloc[i] for i in top_agents_indices]
    return zip(top_agents_scores, top_agents)

# Streamlit app
st.title("Real Estate Agent Recommender")

# Input fields
preferred_location = st.selectbox("Preferred Location", ['Toronto', 'Vancouver', 'Montreal'])
preferred_property_type = st.selectbox("Preferred Property Type", ['Condo', 'House', 'Apartment'])
max_budget = st.slider("Maximum Budget", 100000, 1000000, 300000, 10000)
language = st.selectbox("Preferred Agent Language", ['English', 'French', 'Hindi'])
amenities = st.multiselect("Preferred Amenities", ['Pool', 'Gym', 'Parking'])

# Button to get recommendations
if st.button("Get Recommendations"):
    client_features = {
        'preferred_location': preferred_location,
        'preferred_property_type': preferred_property_type,
        'max_budget': max_budget,
        'language': language,
        'amenities': ",".join(amenities)
    }
    recommendations = calculate_similarity(client_features, agents)
    
    # Create DataFrame for displaying the results in a table
    recommendation_data = []
    rank = 1
    for score, agent in recommendations:
        recommendation_data.append([rank, score, agent['agent_id'], agent['location_expertise'], agent['property_type_expertise'], agent['average_deal_completion_rate'], agent['language'], agent['amenities']])
        rank += 1
    recommendation_df = pd.DataFrame(recommendation_data, columns=['Rank', 'Score', 'Agent ID', 'Location Expertise', 'Property Type Expertise', 'Average Deal Completion Rate', 'Language', 'Amenities'])
    st.table(recommendation_df)