Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.ensemble import IsolationForest | |
| from sklearn.preprocessing import StandardScaler | |
| # Set page title and icon | |
| st.set_page_config(page_title="Anomaly Detection App", page_icon="π") | |
| # Custom CSS for better styling | |
| st.markdown(""" | |
| <style> | |
| .stButton>button { | |
| background-color: #4CAF50; | |
| color: white; | |
| font-weight: bold; | |
| border-radius: 5px; | |
| padding: 10px 20px; | |
| } | |
| .stDownloadButton>button { | |
| background-color: #008CBA; | |
| color: white; | |
| font-weight: bold; | |
| border-radius: 5px; | |
| padding: 10px 20px; | |
| } | |
| .stMarkdown h1 { | |
| color: #4CAF50; | |
| } | |
| .stMarkdown h2 { | |
| color: #008CBA; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Title of the app | |
| st.title("π Anomaly Detection App") | |
| st.write(""" | |
| This app uses the **Isolation Forest** algorithm to detect anomalies in your dataset. | |
| Upload a CSV file, and the app will identify anomalies in the data. | |
| """) | |
| # Upload dataset | |
| uploaded_file = st.file_uploader("Upload your dataset (CSV file)", type=["csv"]) | |
| if uploaded_file is not None: | |
| # Load the dataset | |
| df = pd.read_csv(uploaded_file) | |
| # Show dataset preview | |
| st.write("### Dataset Preview") | |
| st.write(df.head()) | |
| # Select features for anomaly detection | |
| st.write("### Select Features") | |
| features = st.multiselect("Choose the features to use for anomaly detection", df.columns) | |
| if features: | |
| # Allow user to adjust contamination parameter | |
| st.write("### Adjust Model Parameters") | |
| contamination = st.slider("Contamination (proportion of anomalies)", 0.01, 0.5, 0.1, 0.01) | |
| # Preprocess the data | |
| scaler = StandardScaler() | |
| df_scaled = scaler.fit_transform(df[features]) | |
| # Train the Isolation Forest model | |
| with st.spinner("Training the model and detecting anomalies..."): | |
| model = IsolationForest(n_estimators=100, contamination=contamination, random_state=42) | |
| model.fit(df_scaled) | |
| # Predict anomalies | |
| predictions = model.predict(df_scaled) | |
| df['anomaly'] = predictions # -1 for anomaly, 1 for normal | |
| # Display results | |
| st.write("### Anomaly Detection Results") | |
| st.write(df) | |
| # Filter and display only anomalies | |
| anomalies = df[df['anomaly'] == -1] | |
| st.write(f"### Detected Anomalies (Total: {len(anomalies)})") | |
| st.write(anomalies) | |
| # Visualize anomalies | |
| st.write("### Visualize Anomalies") | |
| if len(features) >= 2: | |
| fig, ax = plt.subplots() | |
| sns.scatterplot(data=df, x=features[0], y=features[1], hue='anomaly', palette={1: 'blue', -1: 'red'}) | |
| st.pyplot(fig) | |
| else: | |
| st.warning("Please select at least 2 features to visualize anomalies.") | |
| # Download results as CSV | |
| st.write("### Download Results") | |
| st.download_button( | |
| label="Download Results as CSV", | |
| data=df.to_csv(index=False).encode('utf-8'), | |
| file_name='anomaly_detection_results.csv', | |
| mime='text/csv', | |
| ) | |
| else: | |
| st.write("Please upload a CSV file to get started.") |