Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import joblib | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.cluster import KMeans | |
| from sklearn.ensemble import IsolationForest | |
| from sklearn.metrics import silhouette_score | |
| # Load Models | |
| kmeans = joblib.load("kmeans_model.pkl") | |
| isolation_forest = joblib.load("isolation_forest_model.pkl") | |
| # Streamlit UI | |
| st.set_page_config(page_title="Anomaly & Clustering Analysis", page_icon="π", layout="wide") | |
| st.title("π Anomaly & Clustering Analysis") | |
| st.markdown("### A simple interactive app to analyze anomalies and clusters in generated data using Isolation Forest and K-Means clustering.") | |
| # Load dataset | |
| df = pd.read_csv("dataset.csv") | |
| df.dropna(inplace=True) | |
| # Ensure data is numerical | |
| df["Accidental Deaths"] = pd.to_numeric(df["Accidental Deaths"], errors='coerce') | |
| # Standardize Data | |
| scaler = StandardScaler() | |
| scaled_data = scaler.fit_transform(df[["Accidental Deaths"]]) | |
| # Determine optimal K | |
| silhouette_scores = [] | |
| k_values = range(2, 10) | |
| for k in k_values: | |
| kmeans_temp = KMeans(n_clusters=k, random_state=42, n_init=10) | |
| labels = kmeans_temp.fit_predict(scaled_data) | |
| score = silhouette_score(scaled_data, labels) | |
| silhouette_scores.append(score) | |
| optimal_k = k_values[np.argmax(silhouette_scores)] | |
| # Train final K-Means with optimal K | |
| kmeans_final = KMeans(n_clusters=optimal_k, random_state=42, n_init=10) | |
| df["Cluster"] = kmeans_final.fit_predict(scaled_data) | |
| # Tabs | |
| tabs = st.tabs(["π Data Preview", "π Insights", "π§ͺ User Testing"]) | |
| with tabs[0]: | |
| st.markdown("## π Data Preview") | |
| with st.expander("π View Dataset Sample"): | |
| st.dataframe(df.head()) | |
| st.markdown("### π Cluster Distribution") | |
| unique, counts = np.unique(df["Cluster"], return_counts=True) | |
| st.bar_chart(pd.DataFrame({"Cluster": unique, "Count": counts})) | |
| with tabs[1]: | |
| st.markdown("## π Insights & Visualizations") | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| for cluster in np.unique(df["Cluster"]): | |
| subset = df[df["Cluster"] == cluster] | |
| ax.scatter(subset.index, subset["Accidental Deaths"], label=f"Cluster {cluster}") | |
| ax.plot(df.index, df["Accidental Deaths"], color="gray", linestyle="dashed", alpha=0.5) | |
| ax.set_title("Clustered Data") | |
| ax.set_xlabel("Index") | |
| ax.set_ylabel("Accidental Deaths") | |
| ax.legend() | |
| st.pyplot(fig) | |
| with tabs[2]: | |
| st.markdown("## π§ͺ User Testing") | |
| st.markdown(f"### Optimal K Selected: **{optimal_k}**") | |
| user_input = st.number_input("Enter a new Accidental Deaths value:", min_value=0, step=1) | |
| if st.button("π Analyze Input"): | |
| input_scaled = scaler.transform(np.array([[user_input]])) | |
| cluster_prediction = kmeans_final.predict(input_scaled)[0] | |
| anomaly_prediction = isolation_forest.predict(input_scaled)[0] | |
| anomaly_status = "π’ Normal" if anomaly_prediction == 1 else "π΄ Anomalous" | |
| st.write(f"**Cluster Assigned:** {cluster_prediction}") | |
| st.write(f"**Anomaly Status:** {anomaly_status}") | |
| st.write(f"### **Anomaly Prediction: {anomaly_status}**") | |
| st.markdown("### π Updated Cluster Visualization") | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| for cluster in np.unique(df["Cluster"]): | |
| subset = df[df["Cluster"] == cluster] | |
| ax.scatter(subset.index, subset["Accidental Deaths"], label=f"Cluster {cluster}") | |
| ax.plot(df.index, df["Accidental Deaths"], color="gray", linestyle="dashed", alpha=0.5) | |
| # Highlight user input | |
| if 'user_input' in locals(): | |
| ax.scatter(len(df), user_input, color='red', marker='x', s=100, label="User Input") | |
| ax.set_title("Updated Clustering Graph") | |
| ax.set_xlabel("Index") | |
| ax.set_ylabel("Accidental Deaths") | |
| ax.legend() | |
| st.pyplot(fig) | |
| if st.button("π Refresh Data"): | |
| st.rerun() | |