sdbrgo commited on
Commit
de583e8
·
verified ·
1 Parent(s): f750436

Update app.py

Browse files

Added final_clustering(), gradio interface; Needs Cluster Exploration tab, visualization of KMeans

Files changed (1) hide show
  1. app.py +55 -4
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import numpy as np
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
@@ -5,10 +6,60 @@ import joblib
5
  import json
6
 
7
  # custom functions
8
- from cluster_utils import choose_k,
9
- compute_cluster_centroids_pca, inverse_project_centroids, compute_cluster_stats,
10
- identify_top_drivers
11
 
12
  exploration_pipeline = joblib.load("exploration_pipeline.pkl")
13
- core_pipeline = joblib.load("core_pipeline.pkl")
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr # used to build simple interface
2
  import numpy as np
3
  import pandas as pd
4
  import matplotlib.pyplot as plt
 
6
  import json
7
 
8
  # custom functions
9
+ from cluster_utils import *
 
 
10
 
11
  exploration_pipeline = joblib.load("exploration_pipeline.pkl")
 
12
 
13
+ #========== HELPER FUNCTIONS ==========
14
+ def final_clustering(file, top_features):
15
+ df = pd.read_csv(file)
16
+ core_pipeline = joblib.load("core_pipeline.pkl")
17
+
18
+ X_pca = core_pipeline.transform(df)
19
+
20
+ # dynamic `k` selection
21
+ # choose_k() is from cluster_utils
22
+ best_k = choose_k(X_pca)
23
+
24
+ kmeans = KMeans(
25
+ n_clusters=best_k,
26
+ random_state=42,
27
+ n_init="auto"
28
+ )
29
+ labels = kmeans.fit_predict(X_pca)
30
+
31
+ # Cluster Analysis
32
+ pca = pipeline.names_steps["pca"]
33
+ scaler = pipeline.names_steps["scaler"]
34
+ feature_names = df.columns.tolist()
35
+ centroids = compute_cluster_centroids(X_pca, labels) # function is from cluster_utils
36
+ original_centroids = inverse_project_centroids(
37
+ centroids,
38
+ pca,
39
+ scaler,
40
+ feature_names
41
+ )
42
+ top_drivers = identify_top_drivers(original_centroids, top_features)
43
+
44
+ return best_k, top_drivers
45
+
46
+ #========== GRADIO INTERFACE ==========
47
+ with gr.Blocks(title="PERCEUL: Perception-Based Worker Profiler") as app:
48
+
49
+ gr.markdown("# 🧠 Worker Profiling & Cluster Analysis")
50
+
51
+ with gr.Tab("Final Clustering"):
52
+ file_input_final = gr.File(label="Upload CSV")
53
+ top_features = gr.Slider(3, 10, value=5, label="Show Top `n` Features")
54
+
55
+ run_btn = gr.Button("Run Final Clustering")
56
+ best_k_out = gr.Textbox(label="Selected K")
57
+ drivers_out = gr.JSON(label="Top Feature Drivers per Cluster")
58
+
59
+ run_btn.click(
60
+ final_clustering,
61
+ inputs=[file_input_final, top_features]
62
+ outputs=[best_k_out, drivers_out]
63
+ )
64
+
65
+ app.launch()