Nagaraj81's picture
Upload 3 files
6b4aaf6 verified
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
from sklearn.preprocessing import StandardScaler, LabelEncoder
import gradio as gr
def cluster_and_plot(file):
df = pd.read_excel(file.name)
categorical_cols = ["Firm", "Region", "Practice Area", "Industry Focus"]
df_encoded = df.copy()
for col in categorical_cols:
le = LabelEncoder()
df_encoded[col] = le.fit_transform(df[col])
features = ["Firm", "Region", "Years of Experience", "Clients Handled",
"Revenue Generated ($M)", "Practice Area", "Industry Focus"]
X = df_encoded[features]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
linkage_matrix = linkage(X_scaled, method='ward')
num_clusters = 4
df["Cluster"] = fcluster(linkage_matrix, num_clusters, criterion='maxclust')
fig, ax = plt.subplots(figsize=(12, 6))
dendrogram(linkage_matrix, labels=df["Partner Name"].values, leaf_rotation=90, leaf_font_size=10, ax=ax)
plt.title("Dendrogram of Consulting Partners")
plt.xlabel("Partner")
plt.ylabel("Distance")
plt.tight_layout()
return fig, df
demo = gr.Interface(
fn=cluster_and_plot,
inputs=gr.File(label="Upload Excel File"),
outputs=[gr.Plot(label="Dendrogram"), gr.Dataframe(label="Clustered Data")],
title="Big 4 Consulting Partners Clustering",
description="Upload an Excel sheet of partner data to view hierarchical clusters."
)
if __name__ == "__main__":
demo.launch()