Spaces:

sdbrgo
/

PERCEUL

Sleeping

App Files Files Community

sdbrgo commited on Dec 16, 2025

Commit

de583e8

verified ·

1 Parent(s): f750436

Update app.py

Browse files

Added final_clustering(), gradio interface; Needs Cluster Exploration tab, visualization of KMeans

Files changed (1) hide show

app.py +55 -4

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -5,10 +6,60 @@ import joblib
 import json
 # custom functions
-from cluster_utils import choose_k,
-compute_cluster_centroids_pca, inverse_project_centroids, compute_cluster_stats,
-identify_top_drivers
 exploration_pipeline = joblib.load("exploration_pipeline.pkl")
-core_pipeline = joblib.load("core_pipeline.pkl")

+import gradio as gr # used to build simple interface
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 import json
 # custom functions
+from cluster_utils import *
 exploration_pipeline = joblib.load("exploration_pipeline.pkl")
+#========== HELPER FUNCTIONS ==========
+def final_clustering(file, top_features):
+    df = pd.read_csv(file)
+    core_pipeline = joblib.load("core_pipeline.pkl")
+    X_pca = core_pipeline.transform(df)
+    # dynamic `k` selection
+    # choose_k() is from cluster_utils
+    best_k = choose_k(X_pca)
+    kmeans = KMeans(
+        n_clusters=best_k,
+        random_state=42,
+        n_init="auto"
+    )
+    labels = kmeans.fit_predict(X_pca)
+    # Cluster Analysis
+    pca = pipeline.names_steps["pca"]
+    scaler = pipeline.names_steps["scaler"]
+    feature_names = df.columns.tolist()
+    centroids = compute_cluster_centroids(X_pca, labels) # function is from cluster_utils
+    original_centroids = inverse_project_centroids(
+        centroids,
+        pca,
+        scaler,
+        feature_names
+    )
+    top_drivers = identify_top_drivers(original_centroids, top_features)
+    return best_k, top_drivers
+#========== GRADIO INTERFACE ==========
+with gr.Blocks(title="PERCEUL: Perception-Based Worker Profiler") as app:
+    gr.markdown("# 🧠 Worker Profiling & Cluster Analysis")
+    with gr.Tab("Final Clustering"):
+        file_input_final = gr.File(label="Upload CSV")
+        top_features = gr.Slider(3, 10, value=5, label="Show Top `n` Features")
+        run_btn = gr.Button("Run Final Clustering")
+        best_k_out = gr.Textbox(label="Selected K")
+        drivers_out = gr.JSON(label="Top Feature Drivers per Cluster")
+        run_btn.click(
+            final_clustering,
+            inputs=[file_input_final, top_features]
+            outputs=[best_k_out, drivers_out]
+        )
+app.launch()