Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import numpy as np | |
| import joblib | |
| from sklearn.preprocessing import StandardScaler | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| # Load the non-anomaly data | |
| non_anomaly_csv_filename = 'non_anomaly_data.csv' | |
| non_anomaly_df = pd.read_csv(non_anomaly_csv_filename) | |
| # Open the Mitos Spreadsheet file | |
| #st.write("Opening Mitos Spreadsheet file...") | |
| #st.csv_open("non_anomaly_data.csv") | |
| # Display the first sheet | |
| #st.write(st.get_active_sheet().name) | |
| # Display the first row of the first sheet | |
| #st.write(st.get_active_sheet().rows[0]) | |
| # Load the Isolation Forest model | |
| model_filename = "IsolationForest.joblib" | |
| isolation_forest = joblib.load(model_filename) | |
| # Load the StandardScaler | |
| scaler_filename = "StandardScaler.joblib" | |
| scaler = joblib.load(scaler_filename) | |
| st.title("Anomaly Detection App with Isolation Forest") | |
| st.sidebar.title("Input Feature Values") | |
| transaction_dollar_amount = st.sidebar.slider("Transaction Dollar Amount", min_value=0.0, max_value=10000.0) | |
| longitude = st.sidebar.slider("Longitude (Long)", min_value=-180.0, max_value=180.0) | |
| latitude = st.sidebar.slider("Latitude (Lat)", min_value=-90.0, max_value=90.0) | |
| credit_card_limit = st.sidebar.slider("Credit Card Limit", min_value=0, max_value=50000) | |
| year = st.sidebar.slider("Year", min_value=2000, max_value=2030) | |
| month = st.sidebar.slider("Month", min_value=1, max_value=12) | |
| day = st.sidebar.slider("Day", min_value=1, max_value=31) | |
| submitted = st.sidebar.button("Submit") | |
| if submitted: | |
| input_data = { | |
| 'transaction_dollar_amount': transaction_dollar_amount, | |
| 'Long': longitude, | |
| 'Lat': latitude, | |
| 'credit_card_limit': credit_card_limit, | |
| 'year': year, | |
| 'month': month, | |
| 'day': day | |
| } | |
| selected_columns = pd.DataFrame([input_data]) | |
| # Standardize the input data using the loaded StandardScaler | |
| selected_columns_scaled = scaler.transform(selected_columns) | |
| # Apply Isolation Forest for anomaly detection on the non-anomaly dataset | |
| non_anomaly_scores = isolation_forest.decision_function(scaler.transform(non_anomaly_df)) | |
| # Apply Isolation Forest for anomaly detection on your single input data | |
| your_anomaly_score = isolation_forest.decision_function(selected_columns_scaled)[0] | |
| # Calculate the minimum and maximum anomaly scores from non-anomaly data | |
| min_non_anomaly_score = np.min(non_anomaly_scores) | |
| max_non_anomaly_score = np.max(non_anomaly_scores) | |
| # Add a margin of error for the range | |
| margin = 0.5 | |
| min_threshold = min_non_anomaly_score - margin | |
| max_threshold = max_non_anomaly_score + margin | |
| # Determine if the input data point is an anomaly based on the score | |
| #is_anomaly = your_anomaly_score >= np.percentile(non_anomaly_scores, 95) | |
| # Determine if the input data point is an anomaly based on the score | |
| is_anomaly = your_anomaly_score < min_threshold or your_anomaly_score > max_threshold | |
| # Print the anomaly status | |
| st.subheader("Anomaly Classification") | |
| if is_anomaly: | |
| st.write("Prediction Result: π¨ Anomaly Detected!") | |
| else: | |
| st.write("Prediction Result: β Not Anomaly") | |
| # Create a bar plot to visualize the anomaly score distribution and your data point's score | |
| plt.figure(figsize=(8, 5)) | |
| # Plot the distribution of anomaly scores from the non-anomaly dataset | |
| sns.histplot(non_anomaly_scores, kde=True, color='gray', label='Non-Anomaly Score Distribution') | |
| # Plot your data point's anomaly score | |
| plt.axvline(x=your_anomaly_score, color='blue', linestyle='dashed', label='Your Data Point') | |
| # Set labels and title | |
| plt.xlabel('Anomaly Score') | |
| plt.ylabel('Frequency') | |
| plt.title('Anomaly Score Distribution and Your Data Point') | |
| plt.legend() | |
| #plt.grid(True) | |
| # Display the histogram plot | |
| st.pyplot(plt) | |
| # Explain the results | |
| st.write("The input data point has been classified as an anomaly." if is_anomaly | |
| else "The input data point is not classified as an anomaly.") | |
| st.write("The anomaly score is:", your_anomaly_score) | |
| st.write("The threshold for anomaly detection is:", min_threshold, "to", max_threshold) | |
| # Create a scatter plot for longitude and latitude | |
| fig, ax = plt.subplots(figsize=(10, 8)) | |
| # Plot non-anomaly data | |
| sns.scatterplot(data=non_anomaly_df, x='Long', y='Lat', color='lightgrey', label='Normal ποΈ', ax=ax) | |
| # Plot input data | |
| if is_anomaly: | |
| ax.scatter(selected_columns['Long'], selected_columns['Lat'], color='red', label='Suspicious π©', s=100, marker='x') | |
| anomaly_marker = 'Suspicious π©' | |
| else: | |
| ax.scatter(selected_columns['Long'], selected_columns['Lat'], color='green', label='Valid β ', s=100, marker='o') | |
| anomaly_marker = 'Valid β ' | |
| ax.set_xlabel("Longitude") | |
| ax.set_ylabel("Latitude") | |
| ax.set_title("Location Plot: Anomaly Detection πΊοΈ") | |
| ax.legend() | |
| ax.grid(True) | |
| # Show the scatter plot in Streamlit | |
| st.subheader("Location Plot: Anomaly Detection πΊοΈ") | |
| st.pyplot(fig) | |
| # Explanation based on the anomaly classification | |
| st.subheader("Anomaly Classification") | |
| if your_anomaly_score < min_threshold or your_anomaly_score > max_threshold: | |
| st.write("Prediction Result: π¨ Anomaly Detected!") | |
| else: | |
| st.write("Prediction Result: β Not Anomaly") | |
| # Explain the results | |
| # Explain the results | |
| st.write("The location plot visualizes the anomaly detection result based on longitude and latitude.") | |
| if your_anomaly_score < min_threshold or your_anomaly_score > max_threshold: | |
| st.write("The input data point is marked as Suspicious π© due to its anomaly score.") | |
| st.write("The red 'x' marker indicates a suspicious location.") | |
| else: | |
| st.write("The input data point is marked as Valid β due to its anomaly score.") | |
| st.write("The green 'o' marker indicates a valid location.") |