Yatheshr's picture
Update app.py
b2bacc0 verified
import joblib
import pandas as pd
import matplotlib.pyplot as plt
import gradio as gr
# Load pre-trained encoder and KMeans model
encoder = joblib.load('fund_type_encoder.pkl')
model = joblib.load('investor_segmentation.pkl')
# Function to process the uploaded file and create plot
def analyze_investors(file):
df = pd.read_csv(file.name) # Read uploaded file
# Transform the 'Fund_Type_Viewed_Most' column using encoder
X_encoded = encoder.transform(df[['Fund_Type_Viewed_Most']])
# Predict cluster using the pre-trained model
df['Cluster'] = model.predict(X_encoded)
# Assign meaningful labels to the clusters
cluster_labels = (
df.groupby('Cluster')['Fund_Type_Viewed_Most']
.agg(lambda x: x.value_counts().idxmax())
.to_dict()
)
# Map cluster numbers to readable labels
df['Cluster_Label'] = df['Cluster'].map(lambda c: f"{cluster_labels[c]}")
# Prepare pie chart data
cluster_counts = df['Cluster_Label'].value_counts()
# Prepare text box data
cluster_summary = df.groupby('Cluster_Label')['News_Reads_Per_Week'].sum()
text_str = "\n".join([f"{label}: {reads} reads/week" for label, reads in cluster_summary.items()])
# Create Plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
# Pie Chart
#ax1.pie(cluster_counts, labels=cluster_counts.index, autopct='%1.1f%%', startangle=90)
# Donut Chart
ax1.pie(cluster_counts, labels=cluster_counts.index, autopct='%1.1f%%', startangle=90, wedgeprops={'width': 0.3})
ax1.set_title('Fund Type Distribution')
ax1.axis('equal')
# Text Box
ax2.axis('off')
props = dict(boxstyle='round', facecolor='lightgrey', alpha=0.4)
ax2.text(0.95, 0.9, text_str, fontsize=12, va='top', ha='right', bbox=props)
ax2.set_title('Total News Reads per Week', loc='right')
return fig
# Set up Gradio interface
gr.Interface(
fn=analyze_investors,
inputs=gr.File(label="Upload Investors CSV"),
outputs=gr.Plot(label="Investors Clusters Visualization")
).launch()