import streamlit as st import pandas as pd import numpy as np from sklearn.metrics.pairwise import cosine_similarity from sklearn.feature_extraction.text import CountVectorizer # Dummy data for agents agents = pd.DataFrame({ 'agent_id': range(1, 21), 'location_expertise': ['Toronto', 'Vancouver', 'Montreal'] * 6 + ['Toronto', 'Vancouver'], 'property_type_expertise': ['Condo', 'House', 'Apartment'] * 6 + ['Condo', 'House'], 'average_deal_completion_rate': np.random.random(20), 'language': ['English', 'French', 'Hindi', 'English', 'French', 'English', 'Hindi', 'French', 'English', 'Hindi'] * 2, 'amenities': ['Pool,Gym', 'Gym,Parking', 'Pool,Parking,Gym', 'Parking', 'Pool,Gym', 'Gym,Parking', 'Pool', 'Gym', 'Parking,Pool', 'Pool'] * 2 }) # Function to calculate cosine similarity def calculate_similarity(client_features, agents): # Combining client features client_combined_features = [client_features['preferred_location'] + " " + client_features['preferred_property_type'] + " " + client_features['language'] + " " + client_features['amenities']] agents['combined_features'] = agents['location_expertise'] + " " + agents['property_type_expertise'] + " " + agents['language'] + " " + agents['amenities'] # Count Vectorizer for combined features vectorizer = CountVectorizer() count_matrix = vectorizer.fit_transform(pd.concat([pd.Series(client_combined_features), agents['combined_features']])) # Calculate cosine similarity similarity = cosine_similarity(count_matrix[0:1], count_matrix[1:]) # Get top 3 agents top_agents_indices = np.argsort(similarity[0])[-3:][::-1] top_agents_scores = [similarity[0][i] for i in top_agents_indices] top_agents = [agents.iloc[i] for i in top_agents_indices] return zip(top_agents_scores, top_agents) # Streamlit app st.title("Real Estate Agent Recommender") # Input fields preferred_location = st.selectbox("Preferred Location", ['Toronto', 'Vancouver', 'Montreal']) preferred_property_type = st.selectbox("Preferred Property Type", ['Condo', 'House', 'Apartment']) max_budget = st.slider("Maximum Budget", 100000, 1000000, 300000, 10000) language = st.selectbox("Preferred Agent Language", ['English', 'French', 'Hindi']) amenities = st.multiselect("Preferred Amenities", ['Pool', 'Gym', 'Parking']) # Button to get recommendations if st.button("Get Recommendations"): client_features = { 'preferred_location': preferred_location, 'preferred_property_type': preferred_property_type, 'max_budget': max_budget, 'language': language, 'amenities': ",".join(amenities) } recommendations = calculate_similarity(client_features, agents) # Create DataFrame for displaying the results in a table recommendation_data = [] rank = 1 for score, agent in recommendations: recommendation_data.append([rank, score, agent['agent_id'], agent['location_expertise'], agent['property_type_expertise'], agent['average_deal_completion_rate'], agent['language'], agent['amenities']]) rank += 1 recommendation_df = pd.DataFrame(recommendation_data, columns=['Rank', 'Score', 'Agent ID', 'Location Expertise', 'Property Type Expertise', 'Average Deal Completion Rate', 'Language', 'Amenities']) st.table(recommendation_df)