jerr3y commited on
Commit
a568baa
·
verified ·
1 Parent(s): 51ba011

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +50 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.ensemble import IsolationForest
5
+ from sklearn.preprocessing import StandardScaler
6
+ import matplotlib.pyplot as plt
7
+ from io import BytesIO
8
+
9
+ # Streamlit UI
10
+ st.title("Saif Check Anomalies")
11
+ st.write("Upload an Excel file to detect anomalies")
12
+
13
+ uploaded_file = st.file_uploader("Choose an Excel file", type=["xlsx","xls"])
14
+
15
+ if uploaded_file:
16
+ # Process the file
17
+ df = pd.read_excel(uploaded_file)
18
+
19
+ # Remove string columns
20
+ df = df.select_dtypes(include=[int, float])
21
+
22
+ # Scale the features
23
+ scaler = StandardScaler()
24
+ scaled_data = scaler.fit_transform(df)
25
+
26
+ # Fit Isolation Forest model
27
+ clf = IsolationForest(contamination=0.15, random_state=42)
28
+ clf.fit(scaled_data)
29
+ predictions = clf.predict(scaled_data)
30
+
31
+ # Identify anomalies
32
+ anomaly_indices = np.where(predictions == -1)[0]
33
+ anomalies = df.iloc[anomaly_indices]
34
+
35
+ # Display the number of anomalies
36
+ num_anomalies = len(anomalies)
37
+ st.subheader(f"Number of anomalies detected: {num_anomalies}")
38
+
39
+ # Display anomalies
40
+ st.subheader("Anomalies Detected")
41
+ st.write(anomalies)
42
+
43
+ # Generate and display graphs
44
+ for col in df.columns:
45
+ fig, ax = plt.subplots()
46
+ ax.plot(df.index, df[col], label="Data")
47
+ ax.scatter(anomaly_indices, df[col].iloc[anomaly_indices], color='red', label="Anomalies")
48
+ ax.set_title(f"Anomalies in {col}")
49
+ ax.legend()
50
+ st.pyplot(fig)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ scikit-learn
5
+ matplotlib
6
+ openpyxl
7
+ xlrd