import pandas as pd import numpy as np import matplotlib.pyplot as plt from scipy.cluster.hierarchy import dendrogram, linkage, fcluster from sklearn.preprocessing import StandardScaler, LabelEncoder import gradio as gr def cluster_and_plot(file): df = pd.read_excel(file.name) categorical_cols = ["Firm", "Region", "Practice Area", "Industry Focus"] df_encoded = df.copy() for col in categorical_cols: le = LabelEncoder() df_encoded[col] = le.fit_transform(df[col]) features = ["Firm", "Region", "Years of Experience", "Clients Handled", "Revenue Generated ($M)", "Practice Area", "Industry Focus"] X = df_encoded[features] scaler = StandardScaler() X_scaled = scaler.fit_transform(X) linkage_matrix = linkage(X_scaled, method='ward') num_clusters = 4 df["Cluster"] = fcluster(linkage_matrix, num_clusters, criterion='maxclust') fig, ax = plt.subplots(figsize=(12, 6)) dendrogram(linkage_matrix, labels=df["Partner Name"].values, leaf_rotation=90, leaf_font_size=10, ax=ax) plt.title("Dendrogram of Consulting Partners") plt.xlabel("Partner") plt.ylabel("Distance") plt.tight_layout() return fig, df demo = gr.Interface( fn=cluster_and_plot, inputs=gr.File(label="Upload Excel File"), outputs=[gr.Plot(label="Dendrogram"), gr.Dataframe(label="Clustered Data")], title="Big 4 Consulting Partners Clustering", description="Upload an Excel sheet of partner data to view hierarchical clusters." ) if __name__ == "__main__": demo.launch()