File size: 1,492 Bytes
a568baa
 
 
 
 
 
 
fe46eed
a568baa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from io import BytesIO


# Streamlit UI
st.title("Saif Check Anomalies")
st.write("Upload an Excel file to detect anomalies")

uploaded_file = st.file_uploader("Choose an Excel file", type=["xlsx","xls"])

if uploaded_file:
    # Process the file
    df = pd.read_excel(uploaded_file)

    # Remove string columns
    df = df.select_dtypes(include=[int, float])

    # Scale the features
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(df)

    # Fit Isolation Forest model
    clf = IsolationForest(contamination=0.15, random_state=42)
    clf.fit(scaled_data)
    predictions = clf.predict(scaled_data)

    # Identify anomalies
    anomaly_indices = np.where(predictions == -1)[0]
    anomalies = df.iloc[anomaly_indices]

    # Display the number of anomalies
    num_anomalies = len(anomalies)
    st.subheader(f"Number of anomalies detected: {num_anomalies}")

    # Display anomalies
    st.subheader("Anomalies Detected")
    st.write(anomalies)

    # Generate and display graphs
    for col in df.columns:
        fig, ax = plt.subplots()
        ax.plot(df.index, df[col], label="Data")
        ax.scatter(anomaly_indices, df[col].iloc[anomaly_indices], color='red', label="Anomalies")
        ax.set_title(f"Anomalies in {col}")
        ax.legend()
        st.pyplot(fig)