Spaces:

chiichann
/

anomaly_detection_app

Running

App Files Files Community

anomaly_detection_app / app.py

chiichann

Upload 3 files

f3075f7 verified 10 months ago

raw

history blame contribute delete

2.65 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.ensemble import IsolationForest
	from sklearn.preprocessing import StandardScaler

	# App title
	st.title("📈 Anomaly Detection Tool")

	# 🎯 Streamlit Tabs
	tab1, tab2, tab3 = st.tabs(["📖 About", "📊 Dataset Overview", "🚨 Anomaly Detection"])

	# About Tab
	with tab1:
	st.write("""
	This app detects anomalies in time-series data using the Isolation Forest algorithm.
	Users can visualize detected anomalies.

	### How It Works:
	- Step 1: Load a dataset (CSV format from the Numenta Anomaly Benchmark `realKnownCause` dataset)
	- Step 2: Standardize numerical values for better anomaly detection
	- Step 3: Apply Isolation Forest to identify outliers
	- Step 4: Visualize the detected anomalies in a time-series plot
	""")

	# Load dataset
	file_path = "ambient_temperature_system_failure.csv"
	df = pd.read_csv(file_path)

	# Dataset Overview Tab
	with tab2:
	st.write("### Dataset Overview")
	st.write(df.head())

	# Convert timestamp column to datetime
	df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
	df = df.dropna(subset=['timestamp'])
	df.set_index('timestamp', inplace=True)

	st.write("### Processed Dataset")
	st.write(df.head())

	# Anomaly Detection Tab
	with tab3:
	st.write("### Detect Anomalies in the Data")

	# Standardize the data
	scaler = StandardScaler()
	df['scaled_value'] = scaler.fit_transform(df[['value']])

	# Apply Isolation Forest
	contamination_level = st.slider("Select Contamination Level", 0.01, 0.1, 0.05, 0.01)
	model = IsolationForest(contamination=contamination_level, random_state=42)
	df['anomaly'] = model.fit_predict(df[['scaled_value']])
	df['anomaly'] = df['anomaly'].map({1: 0, -1: 1}) # Convert to binary (1: anomaly, 0: normal)

	# Allow user to set anomaly score threshold
	threshold = st.slider("Set Anomaly Score Threshold", -1.0, 1.0, 0.0, 0.01)
	df["anomaly_score"] = model.decision_function(df[["scaled_value"]])
	df["anomaly"] = df["anomaly_score"] < threshold

	# Plot results
	fig, ax = plt.subplots(figsize=(12, 6))
	ax.plot(df.index, df['value'], label='Value', color='blue')
	ax.scatter(df.index[df['anomaly'] == 1], df['value'][df['anomaly'] == 1], color='red', label='Anomaly', marker='o')
	ax.set_xlabel('Timestamp')
	ax.set_ylabel('Value')
	ax.set_title('Anomaly Detection in Time-Series Data')
	ax.legend()
	st.pyplot(fig)