Spaces:

Yatheshr
/

Investor_Segmentation

Sleeping

App Files Files Community

Investor_Segmentation / app.py

Yatheshr

Update app.py

b2bacc0 verified 10 months ago

raw

history blame contribute delete

2.06 kB

	import joblib
	import pandas as pd
	import matplotlib.pyplot as plt
	import gradio as gr

	# Load pre-trained encoder and KMeans model
	encoder = joblib.load('fund_type_encoder.pkl')
	model = joblib.load('investor_segmentation.pkl')

	# Function to process the uploaded file and create plot
	def analyze_investors(file):
	df = pd.read_csv(file.name) # Read uploaded file

	# Transform the 'Fund_Type_Viewed_Most' column using encoder
	X_encoded = encoder.transform(df[['Fund_Type_Viewed_Most']])

	# Predict cluster using the pre-trained model
	df['Cluster'] = model.predict(X_encoded)

	# Assign meaningful labels to the clusters
	cluster_labels = (
	df.groupby('Cluster')['Fund_Type_Viewed_Most']
	.agg(lambda x: x.value_counts().idxmax())
	.to_dict()
	)

	# Map cluster numbers to readable labels
	df['Cluster_Label'] = df['Cluster'].map(lambda c: f"{cluster_labels[c]}")

	# Prepare pie chart data
	cluster_counts = df['Cluster_Label'].value_counts()

	# Prepare text box data
	cluster_summary = df.groupby('Cluster_Label')['News_Reads_Per_Week'].sum()
	text_str = "\n".join([f"{label}: {reads} reads/week" for label, reads in cluster_summary.items()])

	# Create Plot
	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

	# Pie Chart
	#ax1.pie(cluster_counts, labels=cluster_counts.index, autopct='%1.1f%%', startangle=90)
	# Donut Chart
	ax1.pie(cluster_counts, labels=cluster_counts.index, autopct='%1.1f%%', startangle=90, wedgeprops={'width': 0.3})
	ax1.set_title('Fund Type Distribution')
	ax1.axis('equal')

	# Text Box
	ax2.axis('off')
	props = dict(boxstyle='round', facecolor='lightgrey', alpha=0.4)
	ax2.text(0.95, 0.9, text_str, fontsize=12, va='top', ha='right', bbox=props)
	ax2.set_title('Total News Reads per Week', loc='right')

	return fig

	# Set up Gradio interface
	gr.Interface(
	fn=analyze_investors,
	inputs=gr.File(label="Upload Investors CSV"),
	outputs=gr.Plot(label="Investors Clusters Visualization")
	).launch()