Spaces:

Tzetha
/

Anomaly_Detection

Sleeping

App Files Files Community

Anomaly_Detection / app.py

Tzetha

Upload FIles

7a09bec verified 9 months ago

raw

history blame contribute delete

3.43 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	from sklearn.ensemble import IsolationForest
	from sklearn.preprocessing import StandardScaler

	# Set page title and icon
	st.set_page_config(page_title="Anomaly Detection App", page_icon="🔍")

	# Custom CSS for better styling
	st.markdown("""
	<style>
	.stButton>button {
	background-color: #4CAF50;
	color: white;
	font-weight: bold;
	border-radius: 5px;
	padding: 10px 20px;
	}
	.stDownloadButton>button {
	background-color: #008CBA;
	color: white;
	font-weight: bold;
	border-radius: 5px;
	padding: 10px 20px;
	}
	.stMarkdown h1 {
	color: #4CAF50;
	}
	.stMarkdown h2 {
	color: #008CBA;
	}
	</style>
	""", unsafe_allow_html=True)

	# Title of the app
	st.title("🔍 Anomaly Detection App")
	st.write("""
	This app uses the Isolation Forest algorithm to detect anomalies in your dataset.
	Upload a CSV file, and the app will identify anomalies in the data.
	""")

	# Upload dataset
	uploaded_file = st.file_uploader("Upload your dataset (CSV file)", type=["csv"])

	if uploaded_file is not None:
	# Load the dataset
	df = pd.read_csv(uploaded_file)

	# Show dataset preview
	st.write("### Dataset Preview")
	st.write(df.head())

	# Select features for anomaly detection
	st.write("### Select Features")
	features = st.multiselect("Choose the features to use for anomaly detection", df.columns)

	if features:
	# Allow user to adjust contamination parameter
	st.write("### Adjust Model Parameters")
	contamination = st.slider("Contamination (proportion of anomalies)", 0.01, 0.5, 0.1, 0.01)

	# Preprocess the data
	scaler = StandardScaler()
	df_scaled = scaler.fit_transform(df[features])

	# Train the Isolation Forest model
	with st.spinner("Training the model and detecting anomalies..."):
	model = IsolationForest(n_estimators=100, contamination=contamination, random_state=42)
	model.fit(df_scaled)

	# Predict anomalies
	predictions = model.predict(df_scaled)
	df['anomaly'] = predictions # -1 for anomaly, 1 for normal

	# Display results
	st.write("### Anomaly Detection Results")
	st.write(df)

	# Filter and display only anomalies
	anomalies = df[df['anomaly'] == -1]
	st.write(f"### Detected Anomalies (Total: {len(anomalies)})")
	st.write(anomalies)

	# Visualize anomalies
	st.write("### Visualize Anomalies")
	if len(features) >= 2:
	fig, ax = plt.subplots()
	sns.scatterplot(data=df, x=features[0], y=features[1], hue='anomaly', palette={1: 'blue', -1: 'red'})
	st.pyplot(fig)
	else:
	st.warning("Please select at least 2 features to visualize anomalies.")

	# Download results as CSV
	st.write("### Download Results")
	st.download_button(
	label="Download Results as CSV",
	data=df.to_csv(index=False).encode('utf-8'),
	file_name='anomaly_detection_results.csv',
	mime='text/csv',
	)
	else:
	st.write("Please upload a CSV file to get started.")