singhn9 commited on
Commit
bbdc422
·
verified ·
1 Parent(s): ab9c160

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +30 -0
src/streamlit_app.py CHANGED
@@ -391,6 +391,7 @@ with tabs[0]:
391
  )
392
  st.markdown(f"Total features loaded: **{df.shape[1]}** | Rows: **{df.shape[0]}**")
393
 
 
394
  # ----- Visualization tab
395
  with tabs[1]:
396
  st.subheader("Feature Visualization")
@@ -414,6 +415,35 @@ with tabs[1]:
414
  ax2.set_title("Operating Mode Clusters (PCA Projection)")
415
  st.pyplot(fig2, clear_figure=True)
416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  # ----- Correlations tab
418
  with tabs[2]:
419
  st.subheader("Correlation explorer")
 
391
  )
392
  st.markdown(f"Total features loaded: **{df.shape[1]}** | Rows: **{df.shape[0]}**")
393
 
394
+
395
  # ----- Visualization tab
396
  with tabs[1]:
397
  st.subheader("Feature Visualization")
 
415
  ax2.set_title("Operating Mode Clusters (PCA Projection)")
416
  st.pyplot(fig2, clear_figure=True)
417
 
418
+ # --- PCA Explanation ---
419
+ st.markdown("""
420
+ **Interpretation – Operating Mode Clusters**
421
+
422
+ This PCA-based projection compresses over 100 process features into two principal dimensions,
423
+ revealing the dominant patterns in furnace operation. Each color represents an automatically discovered
424
+ *operating mode* (via K-Means clustering).
425
+
426
+ - **Distinct clusters (colors)** → different operating regimes (e.g., high-power melt, refining, tapping, idle)
427
+ - **Overlaps** → transitional phases or process variability
428
+ - **Compact clusters** → stable operation; **spread-out clusters** → drift or unstable control
429
+ - **Shifts between colors** over time may reflect raw-material change or arc power adjustment
430
+
431
+ Understanding these clusters helps metallurgists and control engineers associate process signatures
432
+ with efficient or energy-intensive operating conditions.
433
+ """)
434
+
435
+ # --- Dynamic insight: which features drive PCA the most ---
436
+ from sklearn.decomposition import PCA
437
+ num_df = df.select_dtypes(include=[np.number]).fillna(0)
438
+ pca = PCA(n_components=2, random_state=42)
439
+ pca.fit(num_df)
440
+ comp_df = pd.DataFrame(pca.components_.T, index=num_df.columns, columns=["PC1", "PC2"])
441
+ top_pc1 = comp_df["PC1"].abs().nlargest(5).index.tolist()
442
+ top_pc2 = comp_df["PC2"].abs().nlargest(5).index.tolist()
443
+ st.info(f"**Top variables driving PCA-1 (X-axis):** {', '.join(top_pc1)}")
444
+ st.info(f"**Top variables driving PCA-2 (Y-axis):** {', '.join(top_pc2)}")
445
+
446
+
447
  # ----- Correlations tab
448
  with tabs[2]:
449
  st.subheader("Correlation explorer")