Update src/streamlit_app.py
Browse files- src/streamlit_app.py +30 -0
src/streamlit_app.py
CHANGED
|
@@ -391,6 +391,7 @@ with tabs[0]:
|
|
| 391 |
)
|
| 392 |
st.markdown(f"Total features loaded: **{df.shape[1]}** | Rows: **{df.shape[0]}**")
|
| 393 |
|
|
|
|
| 394 |
# ----- Visualization tab
|
| 395 |
with tabs[1]:
|
| 396 |
st.subheader("Feature Visualization")
|
|
@@ -414,6 +415,35 @@ with tabs[1]:
|
|
| 414 |
ax2.set_title("Operating Mode Clusters (PCA Projection)")
|
| 415 |
st.pyplot(fig2, clear_figure=True)
|
| 416 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
# ----- Correlations tab
|
| 418 |
with tabs[2]:
|
| 419 |
st.subheader("Correlation explorer")
|
|
|
|
| 391 |
)
|
| 392 |
st.markdown(f"Total features loaded: **{df.shape[1]}** | Rows: **{df.shape[0]}**")
|
| 393 |
|
| 394 |
+
|
| 395 |
# ----- Visualization tab
|
| 396 |
with tabs[1]:
|
| 397 |
st.subheader("Feature Visualization")
|
|
|
|
| 415 |
ax2.set_title("Operating Mode Clusters (PCA Projection)")
|
| 416 |
st.pyplot(fig2, clear_figure=True)
|
| 417 |
|
| 418 |
+
# --- PCA Explanation ---
|
| 419 |
+
st.markdown("""
|
| 420 |
+
**Interpretation – Operating Mode Clusters**
|
| 421 |
+
|
| 422 |
+
This PCA-based projection compresses over 100 process features into two principal dimensions,
|
| 423 |
+
revealing the dominant patterns in furnace operation. Each color represents an automatically discovered
|
| 424 |
+
*operating mode* (via K-Means clustering).
|
| 425 |
+
|
| 426 |
+
- **Distinct clusters (colors)** → different operating regimes (e.g., high-power melt, refining, tapping, idle)
|
| 427 |
+
- **Overlaps** → transitional phases or process variability
|
| 428 |
+
- **Compact clusters** → stable operation; **spread-out clusters** → drift or unstable control
|
| 429 |
+
- **Shifts between colors** over time may reflect raw-material change or arc power adjustment
|
| 430 |
+
|
| 431 |
+
Understanding these clusters helps metallurgists and control engineers associate process signatures
|
| 432 |
+
with efficient or energy-intensive operating conditions.
|
| 433 |
+
""")
|
| 434 |
+
|
| 435 |
+
# --- Dynamic insight: which features drive PCA the most ---
|
| 436 |
+
from sklearn.decomposition import PCA
|
| 437 |
+
num_df = df.select_dtypes(include=[np.number]).fillna(0)
|
| 438 |
+
pca = PCA(n_components=2, random_state=42)
|
| 439 |
+
pca.fit(num_df)
|
| 440 |
+
comp_df = pd.DataFrame(pca.components_.T, index=num_df.columns, columns=["PC1", "PC2"])
|
| 441 |
+
top_pc1 = comp_df["PC1"].abs().nlargest(5).index.tolist()
|
| 442 |
+
top_pc2 = comp_df["PC2"].abs().nlargest(5).index.tolist()
|
| 443 |
+
st.info(f"**Top variables driving PCA-1 (X-axis):** {', '.join(top_pc1)}")
|
| 444 |
+
st.info(f"**Top variables driving PCA-2 (Y-axis):** {', '.join(top_pc2)}")
|
| 445 |
+
|
| 446 |
+
|
| 447 |
# ----- Correlations tab
|
| 448 |
with tabs[2]:
|
| 449 |
st.subheader("Correlation explorer")
|