Spaces:
Sleeping
Sleeping
File size: 1,571 Bytes
6b4aaf6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
from sklearn.preprocessing import StandardScaler, LabelEncoder
import gradio as gr
def cluster_and_plot(file):
df = pd.read_excel(file.name)
categorical_cols = ["Firm", "Region", "Practice Area", "Industry Focus"]
df_encoded = df.copy()
for col in categorical_cols:
le = LabelEncoder()
df_encoded[col] = le.fit_transform(df[col])
features = ["Firm", "Region", "Years of Experience", "Clients Handled",
"Revenue Generated ($M)", "Practice Area", "Industry Focus"]
X = df_encoded[features]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
linkage_matrix = linkage(X_scaled, method='ward')
num_clusters = 4
df["Cluster"] = fcluster(linkage_matrix, num_clusters, criterion='maxclust')
fig, ax = plt.subplots(figsize=(12, 6))
dendrogram(linkage_matrix, labels=df["Partner Name"].values, leaf_rotation=90, leaf_font_size=10, ax=ax)
plt.title("Dendrogram of Consulting Partners")
plt.xlabel("Partner")
plt.ylabel("Distance")
plt.tight_layout()
return fig, df
demo = gr.Interface(
fn=cluster_and_plot,
inputs=gr.File(label="Upload Excel File"),
outputs=[gr.Plot(label="Dendrogram"), gr.Dataframe(label="Clustered Data")],
title="Big 4 Consulting Partners Clustering",
description="Upload an Excel sheet of partner data to view hierarchical clusters."
)
if __name__ == "__main__":
demo.launch()
|