File size: 1,492 Bytes
a568baa fe46eed a568baa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from io import BytesIO
# Streamlit UI
st.title("Saif Check Anomalies")
st.write("Upload an Excel file to detect anomalies")
uploaded_file = st.file_uploader("Choose an Excel file", type=["xlsx","xls"])
if uploaded_file:
# Process the file
df = pd.read_excel(uploaded_file)
# Remove string columns
df = df.select_dtypes(include=[int, float])
# Scale the features
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)
# Fit Isolation Forest model
clf = IsolationForest(contamination=0.15, random_state=42)
clf.fit(scaled_data)
predictions = clf.predict(scaled_data)
# Identify anomalies
anomaly_indices = np.where(predictions == -1)[0]
anomalies = df.iloc[anomaly_indices]
# Display the number of anomalies
num_anomalies = len(anomalies)
st.subheader(f"Number of anomalies detected: {num_anomalies}")
# Display anomalies
st.subheader("Anomalies Detected")
st.write(anomalies)
# Generate and display graphs
for col in df.columns:
fig, ax = plt.subplots()
ax.plot(df.index, df[col], label="Data")
ax.scatter(anomaly_indices, df[col].iloc[anomaly_indices], color='red', label="Anomalies")
ax.set_title(f"Anomalies in {col}")
ax.legend()
st.pyplot(fig) |