sdbrgo commited on
Commit
864f147
Β·
verified Β·
1 Parent(s): db9c325

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -61
app.py CHANGED
@@ -7,14 +7,11 @@ import json
7
  import traceback
8
 
9
  from preprocessing.numericselector import NumericSelector
10
-
11
- from sklearn.cluster import KMeans
12
 
13
  # custom functions
14
  from clustering.cluster_utils import *
15
 
16
- # exploration_pipeline = joblib.load("exploration_pipeline.pkl")
17
-
18
  #========== HELPER FUNCTIONS ==========
19
  def format_deviations_as_columns(drivers):
20
  headers = []
@@ -36,66 +33,39 @@ def format_deviations_as_columns(drivers):
36
 
37
  return table
38
 
39
-
40
- def final_clustering(file, top_features):
41
- try:
42
- df = pd.read_csv(file)
43
- core_pipeline = joblib.load("preprocessing/core_pipeline.pkl")
44
-
45
- # debug
46
- print("πŸ“₯ File:", file)
47
- print("πŸ”’ Top N:", top_features, type(top_features))
48
-
49
- X_pca = core_pipeline.fit_transform(df)
50
-
51
- # debug
52
- if not hasattr(core_pipeline.named_steps["numeric_selector"], "numeric_cols_"):
53
- raise RuntimeError("Pipeline was not fitted before transform")
54
-
55
- # dynamic `k` selection
56
- # choose_k() is from cluster_utils
57
- best_k = choose_k(X_pca)
58
-
59
- kmeans = KMeans(
60
- n_clusters=best_k,
61
- random_state=42,
62
- n_init="auto"
63
- )
64
- labels = kmeans.fit_predict(X_pca)
65
-
66
- # Cluster Analysis
67
- pca = core_pipeline.named_steps["pca"]
68
- scaler = core_pipeline.named_steps["scaler"]
69
- feature_names = df.columns.tolist()
70
- centroids = compute_cluster_centroids_pca(X_pca, labels) # function is from cluster_utils
71
- original_centroids = inverse_project_centroids(
72
- centroids,
73
- pca,
74
- scaler,
75
- feature_names
76
- )
77
- top_drivers = identify_top_drivers(original_centroids, top_features)
78
- deviations_markdown = format_deviations_as_columns(top_drivers)
79
-
80
- # debug
81
- print("βœ… Best K:", best_k, type(best_k))
82
- print("πŸ“Š Drivers sample:", top_drivers)
83
-
84
- return best_k, deviations_markdown
85
-
86
- except Exception as e:
87
- print("πŸ”₯ ERROR IN final_clustering πŸ”₯")
88
- traceback.print_exc()
89
-
90
- return "ERROR", {"error": str(e)}
91
-
92
  #========== GRADIO INTERFACE ==========
93
  with gr.Blocks(title="PERCEUL: Perception-Based Worker Profiler") as app:
94
 
95
  gr.Markdown("# 🧠 PERCEUL: Profiler of Perception and Cognitive Ergonomics in the Workplace")
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  with gr.Tab("Final Clustering"):
98
- file_input_final = gr.File(label="Upload CSV")
99
  top_features = gr.Number(
100
  value=5,
101
  label="Number of Features to Display",
@@ -104,15 +74,15 @@ with gr.Blocks(title="PERCEUL: Perception-Based Worker Profiler") as app:
104
  step=1,
105
  precision=0
106
  )
107
-
108
  run_btn = gr.Button("Run Final Clustering")
109
  best_k_out = gr.Number(label="Selected K", interactive=False, precision=0)
110
  gr.Markdown("### Cluster Characteristics")
111
  deviations_out = gr.Markdown()
112
-
113
  run_btn.click(
114
  final_clustering,
115
- inputs=[file_input_final, top_features],
116
  outputs=[best_k_out, deviations_out]
117
  )
118
 
 
7
  import traceback
8
 
9
  from preprocessing.numericselector import NumericSelector
10
+ from cluster_ops.clustering import explore_clusters, final_clustering
 
11
 
12
  # custom functions
13
  from clustering.cluster_utils import *
14
 
 
 
15
  #========== HELPER FUNCTIONS ==========
16
  def format_deviations_as_columns(drivers):
17
  headers = []
 
33
 
34
  return table
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  #========== GRADIO INTERFACE ==========
37
  with gr.Blocks(title="PERCEUL: Perception-Based Worker Profiler") as app:
38
 
39
  gr.Markdown("# 🧠 PERCEUL: Profiler of Perception and Cognitive Ergonomics in the Workplace")
40
 
41
+ file_input = gr.File(label="Upload CSV")
42
+ with gr.Tab("Cluster Exploration"):
43
+ perp = gr.Number(
44
+ value=30,
45
+ label="Perplexity",
46
+ minimum=1,
47
+ maximum=50,
48
+ step=1,
49
+ precision=0
50
+ )
51
+ learn_rate = gr.Number(
52
+ value=200,
53
+ label="Learning Rate",
54
+ minimum=1,
55
+ maximum=1000,
56
+ step=10,
57
+ precision=0
58
+ )
59
+ btn = gr.Button("Explore Clusters")
60
+ plot_output = gr.Plot()
61
+
62
+ btn.click(
63
+ fn=explore_clusters,
64
+ inputs=[file_input, perp, learn_rate],
65
+ outputs=plot_output
66
+ )
67
+
68
  with gr.Tab("Final Clustering"):
 
69
  top_features = gr.Number(
70
  value=5,
71
  label="Number of Features to Display",
 
74
  step=1,
75
  precision=0
76
  )
77
+
78
  run_btn = gr.Button("Run Final Clustering")
79
  best_k_out = gr.Number(label="Selected K", interactive=False, precision=0)
80
  gr.Markdown("### Cluster Characteristics")
81
  deviations_out = gr.Markdown()
82
+
83
  run_btn.click(
84
  final_clustering,
85
+ inputs=[file_input, top_features],
86
  outputs=[best_k_out, deviations_out]
87
  )
88