File size: 1,571 Bytes
6b4aaf6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
from sklearn.preprocessing import StandardScaler, LabelEncoder
import gradio as gr

def cluster_and_plot(file):
    df = pd.read_excel(file.name)

    categorical_cols = ["Firm", "Region", "Practice Area", "Industry Focus"]
    df_encoded = df.copy()
    for col in categorical_cols:
        le = LabelEncoder()
        df_encoded[col] = le.fit_transform(df[col])

    features = ["Firm", "Region", "Years of Experience", "Clients Handled",
                "Revenue Generated ($M)", "Practice Area", "Industry Focus"]
    X = df_encoded[features]
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    linkage_matrix = linkage(X_scaled, method='ward')
    num_clusters = 4
    df["Cluster"] = fcluster(linkage_matrix, num_clusters, criterion='maxclust')

    fig, ax = plt.subplots(figsize=(12, 6))
    dendrogram(linkage_matrix, labels=df["Partner Name"].values, leaf_rotation=90, leaf_font_size=10, ax=ax)
    plt.title("Dendrogram of Consulting Partners")
    plt.xlabel("Partner")
    plt.ylabel("Distance")
    plt.tight_layout()

    return fig, df

demo = gr.Interface(
    fn=cluster_and_plot,
    inputs=gr.File(label="Upload Excel File"),
    outputs=[gr.Plot(label="Dendrogram"), gr.Dataframe(label="Clustered Data")],
    title="Big 4 Consulting Partners Clustering",
    description="Upload an Excel sheet of partner data to view hierarchical clusters."
)

if __name__ == "__main__":
    demo.launch()