Spaces:

Markndrei
/

Accidental-Analysis

Sleeping

App Files Files Community

Accidental-Analysis / app.py

Markndrei

uploading application files

0915a9a 11 months ago

raw

history blame contribute delete

3.93 kB

	import streamlit as st
	import pandas as pd
	import joblib
	import matplotlib.pyplot as plt
	import numpy as np
	from sklearn.preprocessing import StandardScaler
	from sklearn.cluster import KMeans
	from sklearn.ensemble import IsolationForest
	from sklearn.metrics import silhouette_score

	# Load Models
	kmeans = joblib.load("kmeans_model.pkl")
	isolation_forest = joblib.load("isolation_forest_model.pkl")

	# Streamlit UI
	st.set_page_config(page_title="Anomaly & Clustering Analysis", page_icon="📊", layout="wide")
	st.title("📌 Anomaly & Clustering Analysis")
	st.markdown("### A simple interactive app to analyze anomalies and clusters in generated data using Isolation Forest and K-Means clustering.")

	# Load dataset
	df = pd.read_csv("dataset.csv")
	df.dropna(inplace=True)

	# Ensure data is numerical
	df["Accidental Deaths"] = pd.to_numeric(df["Accidental Deaths"], errors='coerce')

	# Standardize Data
	scaler = StandardScaler()
	scaled_data = scaler.fit_transform(df[["Accidental Deaths"]])

	# Determine optimal K
	silhouette_scores = []
	k_values = range(2, 10)
	for k in k_values:
	kmeans_temp = KMeans(n_clusters=k, random_state=42, n_init=10)
	labels = kmeans_temp.fit_predict(scaled_data)
	score = silhouette_score(scaled_data, labels)
	silhouette_scores.append(score)
	optimal_k = k_values[np.argmax(silhouette_scores)]

	# Train final K-Means with optimal K
	kmeans_final = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
	df["Cluster"] = kmeans_final.fit_predict(scaled_data)

	# Tabs
	tabs = st.tabs(["📊 Data Preview", "📈 Insights", "🧪 User Testing"])

	with tabs[0]:
	st.markdown("## 📊 Data Preview")
	with st.expander("📝 View Dataset Sample"):
	st.dataframe(df.head())

	st.markdown("### 🔍 Cluster Distribution")
	unique, counts = np.unique(df["Cluster"], return_counts=True)
	st.bar_chart(pd.DataFrame({"Cluster": unique, "Count": counts}))

	with tabs[1]:
	st.markdown("## 📈 Insights & Visualizations")

	fig, ax = plt.subplots(figsize=(10, 5))
	for cluster in np.unique(df["Cluster"]):
	subset = df[df["Cluster"] == cluster]
	ax.scatter(subset.index, subset["Accidental Deaths"], label=f"Cluster {cluster}")
	ax.plot(df.index, df["Accidental Deaths"], color="gray", linestyle="dashed", alpha=0.5)
	ax.set_title("Clustered Data")
	ax.set_xlabel("Index")
	ax.set_ylabel("Accidental Deaths")
	ax.legend()
	st.pyplot(fig)

	with tabs[2]:
	st.markdown("## 🧪 User Testing")
	st.markdown(f"### Optimal K Selected: {optimal_k}")

	user_input = st.number_input("Enter a new Accidental Deaths value:", min_value=0, step=1)

	if st.button("🔍 Analyze Input"):
	input_scaled = scaler.transform(np.array([[user_input]]))
	cluster_prediction = kmeans_final.predict(input_scaled)[0]
	anomaly_prediction = isolation_forest.predict(input_scaled)[0]
	anomaly_status = "🟢 Normal" if anomaly_prediction == 1 else "🔴 Anomalous"

	st.write(f"Cluster Assigned: {cluster_prediction}")
	st.write(f"Anomaly Status: {anomaly_status}")
	st.write(f"### Anomaly Prediction: {anomaly_status}")

	st.markdown("### 📊 Updated Cluster Visualization")
	fig, ax = plt.subplots(figsize=(10, 5))
	for cluster in np.unique(df["Cluster"]):
	subset = df[df["Cluster"] == cluster]
	ax.scatter(subset.index, subset["Accidental Deaths"], label=f"Cluster {cluster}")
	ax.plot(df.index, df["Accidental Deaths"], color="gray", linestyle="dashed", alpha=0.5)

	# Highlight user input
	if 'user_input' in locals():
	ax.scatter(len(df), user_input, color='red', marker='x', s=100, label="User Input")

	ax.set_title("Updated Clustering Graph")
	ax.set_xlabel("Index")
	ax.set_ylabel("Accidental Deaths")
	ax.legend()
	st.pyplot(fig)

	if st.button("🔄 Refresh Data"):
	st.rerun()