Spaces:

Markndrei
/

Accidental-Analysis

Running

App Files Files Community

Markndrei commited on Mar 13, 2025

Commit

0915a9a

1 Parent(s): f976d9e

uploading application files

Browse files

Files changed (6) hide show

app.py +105 -0
dataset.csv +73 -0
isolation_forest_model.pkl +3 -0
kmeans_model.pkl +3 -0
model.ipynb +0 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import streamlit as st
+import pandas as pd
+import joblib
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+from sklearn.cluster import KMeans
+from sklearn.ensemble import IsolationForest
+from sklearn.metrics import silhouette_score
+# Load Models
+kmeans = joblib.load("kmeans_model.pkl")
+isolation_forest = joblib.load("isolation_forest_model.pkl")
+# Streamlit UI
+st.set_page_config(page_title="Anomaly & Clustering Analysis", page_icon="📊", layout="wide")
+st.title("📌 Anomaly & Clustering Analysis")
+st.markdown("### A simple interactive app to analyze anomalies and clusters in generated data using Isolation Forest and K-Means clustering.")
+# Load dataset
+df = pd.read_csv("dataset.csv")
+df.dropna(inplace=True)
+# Ensure data is numerical
+df["Accidental Deaths"] = pd.to_numeric(df["Accidental Deaths"], errors='coerce')
+# Standardize Data
+scaler = StandardScaler()
+scaled_data = scaler.fit_transform(df[["Accidental Deaths"]])
+# Determine optimal K
+silhouette_scores = []
+k_values = range(2, 10)
+for k in k_values:
+    kmeans_temp = KMeans(n_clusters=k, random_state=42, n_init=10)
+    labels = kmeans_temp.fit_predict(scaled_data)
+    score = silhouette_score(scaled_data, labels)
+    silhouette_scores.append(score)
+optimal_k = k_values[np.argmax(silhouette_scores)]
+# Train final K-Means with optimal K
+kmeans_final = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
+df["Cluster"] = kmeans_final.fit_predict(scaled_data)
+# Tabs
+tabs = st.tabs(["📊 Data Preview", "📈 Insights", "🧪 User Testing"])
+with tabs[0]:
+    st.markdown("## 📊 Data Preview")
+    with st.expander("📝 View Dataset Sample"):
+        st.dataframe(df.head())
+    st.markdown("### 🔍 Cluster Distribution")
+    unique, counts = np.unique(df["Cluster"], return_counts=True)
+    st.bar_chart(pd.DataFrame({"Cluster": unique, "Count": counts}))
+with tabs[1]:
+    st.markdown("## 📈 Insights & Visualizations")
+    fig, ax = plt.subplots(figsize=(10, 5))
+    for cluster in np.unique(df["Cluster"]):
+        subset = df[df["Cluster"] == cluster]
+        ax.scatter(subset.index, subset["Accidental Deaths"], label=f"Cluster {cluster}")
+    ax.plot(df.index, df["Accidental Deaths"], color="gray", linestyle="dashed", alpha=0.5)
+    ax.set_title("Clustered Data")
+    ax.set_xlabel("Index")
+    ax.set_ylabel("Accidental Deaths")
+    ax.legend()
+    st.pyplot(fig)
+with tabs[2]:
+    st.markdown("## 🧪 User Testing")
+    st.markdown(f"### Optimal K Selected: **{optimal_k}**")
+    user_input = st.number_input("Enter a new Accidental Deaths value:", min_value=0, step=1)
+    if st.button("🔍 Analyze Input"):
+        input_scaled = scaler.transform(np.array([[user_input]]))
+        cluster_prediction = kmeans_final.predict(input_scaled)[0]
+        anomaly_prediction = isolation_forest.predict(input_scaled)[0]
+        anomaly_status = "🟢 Normal" if anomaly_prediction == 1 else "🔴 Anomalous"
+        st.write(f"**Cluster Assigned:** {cluster_prediction}")
+        st.write(f"**Anomaly Status:** {anomaly_status}")
+        st.write(f"### **Anomaly Prediction: {anomaly_status}**")
+    st.markdown("### 📊 Updated Cluster Visualization")
+    fig, ax = plt.subplots(figsize=(10, 5))
+    for cluster in np.unique(df["Cluster"]):
+        subset = df[df["Cluster"] == cluster]
+        ax.scatter(subset.index, subset["Accidental Deaths"], label=f"Cluster {cluster}")
+    ax.plot(df.index, df["Accidental Deaths"], color="gray", linestyle="dashed", alpha=0.5)
+    # Highlight user input
+    if 'user_input' in locals():
+        ax.scatter(len(df), user_input, color='red', marker='x', s=100, label="User Input")
+    ax.set_title("Updated Clustering Graph")
+    ax.set_xlabel("Index")
+    ax.set_ylabel("Accidental Deaths")
+    ax.legend()
+    st.pyplot(fig)
+    if st.button("🔄 Refresh Data"):
+        st.rerun()

dataset.csv ADDED Viewed

	@@ -0,0 +1,73 @@

+Month,Accidental Deaths
+1973-01,9007
+1973-02,8106
+1973-03,8928
+1973-04,9137
+1973-05,10017
+1973-06,10826
+1973-07,11317
+1973-08,10744
+1973-09,9713
+1973-10,9938
+1973-11,9161
+1973-12,8927
+1974-01,7750
+1974-02,6981
+1974-03,8038
+1974-04,8422
+1974-05,8714
+1974-06,9512
+1974-07,10120
+1974-08,9823
+1974-09,8743
+1974-10,9129
+1974-11,8710
+1974-12,8680
+1975-01,8162
+1975-02,7306
+1975-03,8124
+1975-04,7870
+1975-05,9387
+1975-06,9556
+1975-07,10093
+1975-08,9620
+1975-09,8285
+1975-10,8433
+1975-11,8160
+1975-12,8034
+1976-01,7717
+1976-02,7461
+1976-03,7776
+1976-04,7925
+1976-05,8634
+1976-06,8945
+1976-07,10078
+1976-08,9179
+1976-09,8037
+1976-10,8488
+1976-11,7874
+1976-12,8647
+1977-01,7792
+1977-02,6957
+1977-03,7726
+1977-04,8106
+1977-05,8890
+1977-06,9299
+1977-07,10625
+1977-08,9302
+1977-09,8314
+1977-10,8850
+1977-11,8265
+1977-12,8796
+1978-01,7836
+1978-02,6892
+1978-03,7791
+1978-04,8129
+1978-05,9115
+1978-06,9434
+1978-07,10484
+1978-08,9827
+1978-09,9110
+1978-10,9070
+1978-11,8633
+1978-12,9240

isolation_forest_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fba1121133e60dbbc29e158cef747fa59d9dd96bea6ff2ac65a48232df0f3d7
+size 1411305

kmeans_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:00192b3fc5d36ba2eec69abb6122d4c1a5ba5d3626a21271391f9f57abcb85fc
+size 1063

model.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+streamlit
+matplotlib
+numpy
+pandas
+scikit-learn