Spaces:

sdbrgo
/

PERCEUL

Sleeping

App Files Files Community

sdbrgo commited on Dec 18, 2025

Commit

864f147

verified ·

1 Parent(s): db9c325

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -61

app.py CHANGED Viewed

@@ -7,14 +7,11 @@ import json
 import traceback
 from preprocessing.numericselector import NumericSelector
-from sklearn.cluster import KMeans
 # custom functions
 from clustering.cluster_utils import *
-# exploration_pipeline = joblib.load("exploration_pipeline.pkl")
 #========== HELPER FUNCTIONS ==========
 def format_deviations_as_columns(drivers):
     headers = []
@@ -36,66 +33,39 @@ def format_deviations_as_columns(drivers):
     return table
-def final_clustering(file, top_features):
-    try:
-        df = pd.read_csv(file)
-        core_pipeline = joblib.load("preprocessing/core_pipeline.pkl")
-        # debug
-        print("📥 File:", file)
-        print("🔢 Top N:", top_features, type(top_features))
-        X_pca = core_pipeline.fit_transform(df)
-        # debug
-        if not hasattr(core_pipeline.named_steps["numeric_selector"], "numeric_cols_"):
-            raise RuntimeError("Pipeline was not fitted before transform")
-        # dynamic `k` selection
-        # choose_k() is from cluster_utils
-        best_k = choose_k(X_pca)
-        kmeans = KMeans(
-            n_clusters=best_k,
-            random_state=42,
-            n_init="auto"
-        )
-        labels = kmeans.fit_predict(X_pca)
-        # Cluster Analysis
-        pca = core_pipeline.named_steps["pca"]
-        scaler = core_pipeline.named_steps["scaler"]
-        feature_names = df.columns.tolist()
-        centroids = compute_cluster_centroids_pca(X_pca, labels) # function is from cluster_utils
-        original_centroids = inverse_project_centroids(
-            centroids,
-            pca,
-            scaler,
-            feature_names
-        )
-        top_drivers = identify_top_drivers(original_centroids, top_features)
-        deviations_markdown = format_deviations_as_columns(top_drivers)
-        # debug
-        print("✅ Best K:", best_k, type(best_k))
-        print("📊 Drivers sample:", top_drivers)
-        return best_k, deviations_markdown
-    except Exception as e:
-        print("🔥 ERROR IN final_clustering 🔥")
-        traceback.print_exc()
-        return "ERROR", {"error": str(e)}
 #========== GRADIO INTERFACE ==========
 with gr.Blocks(title="PERCEUL: Perception-Based Worker Profiler") as app:
     gr.Markdown("# 🧠 PERCEUL: Profiler of Perception and Cognitive Ergonomics in the Workplace")
     with gr.Tab("Final Clustering"):
-        file_input_final = gr.File(label="Upload CSV")
         top_features = gr.Number(
             value=5,
             label="Number of Features to Display",
@@ -104,15 +74,15 @@ with gr.Blocks(title="PERCEUL: Perception-Based Worker Profiler") as app:
             step=1,
             precision=0
         )
         run_btn = gr.Button("Run Final Clustering")
         best_k_out = gr.Number(label="Selected K", interactive=False, precision=0)
         gr.Markdown("### Cluster Characteristics")
         deviations_out = gr.Markdown()
         run_btn.click(
             final_clustering,
-            inputs=[file_input_final, top_features],
             outputs=[best_k_out, deviations_out]
         )

 import traceback
 from preprocessing.numericselector import NumericSelector
+from cluster_ops.clustering import explore_clusters, final_clustering
 # custom functions
 from clustering.cluster_utils import *
 #========== HELPER FUNCTIONS ==========
 def format_deviations_as_columns(drivers):
     headers = []
     return table
 #========== GRADIO INTERFACE ==========
 with gr.Blocks(title="PERCEUL: Perception-Based Worker Profiler") as app:
     gr.Markdown("# 🧠 PERCEUL: Profiler of Perception and Cognitive Ergonomics in the Workplace")
+    file_input = gr.File(label="Upload CSV")
+    with gr.Tab("Cluster Exploration"):
+        perp = gr.Number(
+            value=30,
+            label="Perplexity",
+            minimum=1,
+            maximum=50,
+            step=1,
+            precision=0
+        )
+        learn_rate = gr.Number(
+            value=200,
+            label="Learning Rate",
+            minimum=1,
+            maximum=1000,
+            step=10,
+            precision=0
+        )
+        btn = gr.Button("Explore Clusters")
+        plot_output = gr.Plot()
+        btn.click(
+            fn=explore_clusters,
+            inputs=[file_input, perp, learn_rate],
+            outputs=plot_output
+        )
     with gr.Tab("Final Clustering"):
         top_features = gr.Number(
             value=5,
             label="Number of Features to Display",
             step=1,
             precision=0
         )
         run_btn = gr.Button("Run Final Clustering")
         best_k_out = gr.Number(label="Selected K", interactive=False, precision=0)
         gr.Markdown("### Cluster Characteristics")
         deviations_out = gr.Markdown()
         run_btn.click(
             final_clustering,
+            inputs=[file_input, top_features],
             outputs=[best_k_out, deviations_out]
         )