Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pickle | |
| import pandas as pd | |
| import numpy as np | |
| import banpei | |
| import os | |
| import plotly.graph_objs as go | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import random | |
| # Load the pre-trained model | |
| with open('ANOMALY_random_forest_regressor.pickle', 'rb') as f: | |
| loaded_regressor = pickle.load(f) | |
| # Function to predict TV delta | |
| def predict_tv_delta(temperature, holiday, prev_value, twice_prev_value, day_shift, month_shift): | |
| X_manual = pd.DataFrame({ | |
| 'Temperature': [temperature], | |
| 'holiday': [holiday], | |
| 'prev value': [prev_value], | |
| 'twice prev value': [twice_prev_value], | |
| 'day shift': [day_shift], | |
| 'month shift': [month_shift] | |
| }) | |
| feature_names = X_manual.columns.tolist() | |
| y_pred_manual = loaded_regressor.predict(X_manual[feature_names]) | |
| return y_pred_manual[0] | |
| # Function to detect and classify anomalies | |
| def detect_classify_anomalies(df, window): | |
| df.replace([np.inf, -np.inf], np.NaN, inplace=True) | |
| df.fillna(0, inplace=True) | |
| df['error'] = df['actuals'] - df['predicted'] | |
| df['percentage_change'] = ((df['actuals'] - df['predicted']) / df['actuals']) * 100 | |
| df['meanval'] = df['error'].rolling(window=window).mean() | |
| df['deviation'] = df['error'].rolling(window=window).std() | |
| df['-3s'] = df['meanval'] - (2 * df['deviation']) | |
| df['3s'] = df['meanval'] + (2 * df['deviation']) | |
| df['-2s'] = df['meanval'] - (1.75 * df['deviation']) | |
| df['2s'] = df['meanval'] + (1.75 * df['deviation']) | |
| df['-1s'] = df['meanval'] - (1.5 * df['deviation']) | |
| df['1s'] = df['meanval'] + (1.5 * df['deviation']) | |
| cut_list = df[['error', '-3s', '-2s', '-1s', 'meanval', '1s', '2s', '3s']] | |
| cut_values = cut_list.values | |
| cut_sort = np.sort(cut_values) | |
| df['impact'] = [(lambda x: np.where(cut_sort == df['error'][x])[1][0])(x) for x in range(len(df['error']))] | |
| severity = {0: 3, 1: 2, 2: 1, 3: 0, 4: 0, 5: 1, 6: 2, 7: 3} | |
| region = {0: "NEGATIVE", 1: "NEGATIVE", 2: "NEGATIVE", 3: "NEGATIVE", 4: "POSITIVE", 5: "POSITIVE", 6: "POSITIVE", | |
| 7: "POSITIVE"} | |
| df['color'] = df['impact'].map(severity) | |
| df['region'] = df['impact'].map(region) | |
| df['anomaly_points'] = np.where(df['color'] == 3, df['error'], np.nan) | |
| df = df.sort_values(by='load_date', ascending=False) | |
| df['load_date'] = pd.to_datetime(df['load_date'], format="%Y-%m-%d %H:%M:%S") | |
| return df | |
| # Function to plot anomaly data | |
| def plot_anomaly(df, metric_name): | |
| dates = df.load_date | |
| bool_array = (abs(df['anomaly_points']) > 0) | |
| actuals = df["actuals"][-len(bool_array):] | |
| anomaly_points = bool_array * actuals | |
| anomaly_points[anomaly_points == 0] = np.nan | |
| color_map = {0: 'rgb(228, 222, 249)', 1: "yellow", 2: "orange", 3: "red"} | |
| table = go.Table( | |
| domain=dict(x=[0, 1], y=[0, 0.3]), | |
| columnwidth=[1, 2], | |
| header=dict(height=20, values=[['<b>Date</b>'], ['<b>Actual Values </b>'], | |
| ['<b>Predicted</b>'], ['<b>% Difference</b>'], ['<b>Severity (0-3)</b>']], | |
| font=dict(color=['rgb(45, 45, 45)'] * 5, size=14), | |
| fill=dict(color='#d562be')), | |
| cells=dict(values=[df.round(3)[k].tolist() for k in ['load_date', 'actuals', 'predicted', | |
| 'percentage_change', 'color']], | |
| line=dict(color='#506784'), | |
| align=['center'] * 5, | |
| font=dict(color=['rgb(40, 40, 40)'] * 5, size=12), | |
| suffix=[None] + [''] + [''] + ['%'] + [''], | |
| height=27, | |
| fill=dict(color=[df['color'].map(color_map)], | |
| ) | |
| )) | |
| anomalies = go.Scatter(name="Anomaly", | |
| x=dates, | |
| xaxis='x1', | |
| yaxis='y1', | |
| y=df['anomaly_points'], | |
| mode='markers', | |
| marker=dict(color='red', size=11, line=dict(color="red", width=2))) | |
| upper_bound = go.Scatter(hoverinfo="skip", | |
| x=dates, | |
| showlegend=False, | |
| xaxis='x1', | |
| yaxis='y1', | |
| y=df['3s'], | |
| marker=dict(color="#444"), | |
| line=dict(color=('rgb(23, 96, 167)'), | |
| width=2, | |
| dash='dash'), | |
| fillcolor='rgb(68, 68, 68)', | |
| fill='tonexty') | |
| lower_bound = go.Scatter(name='Confidence', | |
| x=dates, | |
| xaxis='x1', | |
| yaxis='y1', | |
| y=df['-3s'], | |
| marker=dict(color="#444"), | |
| line=dict(color=('rgb(23, 96, 167)'), | |
| width=2, | |
| dash='dash'), | |
| fillcolor='rgb(68, 68, 68)', | |
| fill='tonexty') | |
| Actuals = go.Scatter(name='Actuals', | |
| x=dates, | |
| y=df['actuals'], | |
| xaxis='x2', yaxis='y2', | |
| mode='lines', | |
| marker=dict(size=12, | |
| line=dict(width=1), | |
| color="blue")) | |
| Predicted = go.Scatter(name='Predicted', | |
| x=dates, | |
| y=df['predicted'], | |
| xaxis='x2', yaxis='y2', | |
| mode='lines', | |
| marker=dict(size=12, | |
| line=dict(width=1), | |
| color="orange")) | |
| Error = go.Scatter(name="Error", | |
| x=dates, y=df['error'], | |
| xaxis='x1', | |
| yaxis='y1', | |
| mode='lines', | |
| marker=dict(size=12, | |
| line=dict(width=1), | |
| color="red"), | |
| text="Error") | |
| anomalies_map = go.Scatter(name="anomaly actual", | |
| showlegend=False, | |
| x=dates, | |
| y=anomaly_points, | |
| mode='markers', | |
| xaxis='x2', | |
| yaxis='y2', | |
| marker=dict(color="red", size=11, line=dict(color="red", width=2))) | |
| Mvingavrg = go.Scatter(name="Moving A", | |
| x=dates, | |
| y=df['meanval'], | |
| mode='lines', | |
| xaxis='x1', | |
| yaxis='y1', | |
| marker=dict(size=12, | |
| line=dict(width=1), | |
| color="green"), | |
| text="Moving A") | |
| axis = dict( | |
| showline=True, | |
| zeroline=False, | |
| showgrid=True, | |
| mirror=True, | |
| ticklen=4, | |
| gridcolor='#ffffff', | |
| tickfont=dict(size=10)) | |
| layout = dict( | |
| width=1000, | |
| height=865, | |
| autosize=False, | |
| title=metric_name, | |
| margin=dict(t=75), | |
| showlegend=True, | |
| xaxis1=dict(axis, **dict(domain=[0, 1], anchor='y1', showticklabels=True)), | |
| xaxis2=dict(axis, **dict(domain=[0, 1], anchor='y2', showticklabels=True)), | |
| yaxis1=dict(axis, **dict(domain=[2 * 0.21 + 0.20 + 0.09, 1], anchor='x1', hoverformat='.2f')), | |
| yaxis2=dict(axis, **dict(domain=[0.21 + 0.12, 2 * 0.31 + 0.02], anchor='x2', hoverformat='.2f'))) | |
| fig = go.Figure(data=[table, anomalies, anomalies_map, | |
| upper_bound, lower_bound, Actuals, Predicted, | |
| Mvingavrg, Error], layout=layout) | |
| st.plotly_chart(fig) | |
| # Function to plot temperature anomalies | |
| def plot_temp(anomalies, results,test, title): | |
| fig, ax = plt.subplots(figsize=(20, 10)) | |
| sns.set_style('darkgrid') | |
| sns.lineplot(data=test['tv delta'][3616:6420], color='green', ax=ax) | |
| ymins = [int(test['tv delta'][i] - 25) for i in range(len(anomalies))] | |
| ymaxs = [int(test['tv delta'][i] + 25) for i in range(len(anomalies))] | |
| ax.vlines(x=anomalies, ymin=ymins, ymax=ymaxs, colors='red', ls='-', lw=1) | |
| ax.vlines(x=3895, ymin=0, ymax=350, colors='grey', ls='--', lw=1) | |
| ax.text(x=3895, y=325, s=' Start of Summer', alpha=1, color='black') | |
| ax.vlines(x=6141, ymin=0, ymax=350, colors='grey', ls='--', lw=1) | |
| ax.text(x=6141, y=325, s=' End of Summer', alpha=1, color='black') | |
| ax.set_xlim(3616, 6420) | |
| ax.set_ylim(0, 350) | |
| ax.set_title(title, fontsize=20) | |
| st.pyplot(fig) | |
| # Function to plot changepoint probabilities | |
| def plot_changepoint_probabilities(results, cumcutoff, title): | |
| sns.set(rc={'figure.figsize':(20,5)}) | |
| sns.lineplot(data=results) | |
| sns.lineplot(data=np.asarray(cumcutoff)) | |
| plt.xlim(3616, 6420) | |
| plt.ylim(.00005, 1) | |
| plt.yscale('log') | |
| plt.title(title, fontsize=20) | |
| st.pyplot() | |
| # Function to detect anomalies using different models | |
| def detect_anomalies(results, window_size): | |
| outlierbin = [] | |
| avgprog = [0] * window_size | |
| stdprog = [0] * window_size | |
| cumcutoff = [] | |
| for i in results: | |
| avgprog.append(i) | |
| stdprog.append(i) | |
| cumcutoff.append(np.mean(avgprog) + (2 * np.std(stdprog))) | |
| if np.mean(avgprog) - (2 * np.std(stdprog)) <= i <= np.mean(avgprog) + (2 * np.std(stdprog)): | |
| outlierbin.append(0) | |
| else: | |
| outlierbin.append(1) | |
| avgprog.pop(0) | |
| stdprog.pop(0) | |
| anomalies = to_xcoords(outlierbin) | |
| return anomalies, cumcutoff | |
| # Define a function to convert outlier detection to x-coordinates | |
| def to_xcoords(outlierbin): | |
| xcoords = [] | |
| csum = 0 | |
| for i in outlierbin: | |
| csum += 1 | |
| xcoords.append(csum * i) | |
| return xcoords | |
| # Main function | |
| # Main function | |
| def main(): | |
| st.set_page_config(layout="wide") | |
| st.title("Combined Streamlit App") | |
| # Page selection | |
| selected_page = st.sidebar.multiselect("Select Page", ["TV Delta Prediction", "Anomaly Detection and Analysis", "Temperature Anomaly Detection"]) | |
| if "TV Delta Prediction" in selected_page: | |
| st.title('TV Delta Prediction') | |
| st.write('Enter the values below to predict TV delta.') | |
| temperature = st.slider('Temperature', min_value=-20.0, max_value=40.0, value=5.0) | |
| holiday = st.selectbox('Holiday', options=['No', 'Yes'], index=1) | |
| prev_value = st.number_input('Previous Value', value=29.0) | |
| twice_prev_value = st.number_input('Twice Previous Value', value=41.0) | |
| day_shift = st.slider('Day Shift', min_value=-20.0, max_value=20.0, value=19.0) | |
| month_shift = st.slider('Month Shift', min_value=-200.0, max_value=200.0, value=123.0) | |
| holiday_binary = 1 if holiday == 'Yes' else 0 | |
| if st.button('Predict'): | |
| result = predict_tv_delta(temperature, holiday_binary, prev_value, twice_prev_value, day_shift, month_shift) | |
| st.write('Predicted TV delta:', result) | |
| if "Anomaly Detection and Analysis" in selected_page: | |
| st.title("Anomaly Detection and Analysis") | |
| st.sidebar.title("Settings") | |
| train_file = st.sidebar.file_uploader("Upload Training Data CSV", type=['csv']) | |
| test_file = st.sidebar.file_uploader("Upload Testing Data CSV", type=['csv']) | |
| train_predict_file = st.sidebar.file_uploader("Upload Training Predictions CSV", type=['csv']) | |
| test_predict_file = st.sidebar.file_uploader("Upload Testing Predictions CSV", type=['csv']) | |
| if train_file and test_file and train_predict_file and test_predict_file: | |
| train = pd.read_csv(train_file) | |
| test = pd.read_csv(test_file) | |
| train_predict = pd.read_csv(train_predict_file) | |
| test_predict = pd.read_csv(test_predict_file) | |
| train_predict['Timestamp'] = pd.to_datetime(train_predict['Timestamp']) | |
| test_predict['Timestamp'] = pd.to_datetime(test_predict['Timestamp']) | |
| predicted_df = test_predict[['Timestamp', 'value delta', 'pred']].copy() | |
| predicted_df.columns = ['load_date', 'actuals', 'predicted'] | |
| predicted_df['load_date2'] = predicted_df['load_date'] | |
| predicted_df = predicted_df.set_index('load_date2') | |
| classify_df = detect_classify_anomalies(predicted_df, '14D') | |
| st.markdown("---") | |
| plot_anomaly(classify_df, "Metric Name") | |
| st.markdown("---") | |
| st.subheader("Cluster Counts") | |
| classify_df['cluster'] = np.where(classify_df['color'] == 3, 'Outlier', 'Non-Outlier') | |
| cluster_counts = classify_df['cluster'].value_counts() | |
| st.write(cluster_counts) | |
| if "Temperature Anomaly Detection" in selected_page: | |
| st.title("Temperature Anomaly Detection") | |
| train = pd.read_csv('train_predictions.csv') | |
| train.drop('Timestamp', axis=1, inplace=True) | |
| test = pd.read_csv('test_predictions.csv') | |
| test.drop('Timestamp', axis=1, inplace=True) | |
| model = banpei.SST(w=50) | |
| results = model.detect(test['tv delta'], is_lanczos=True) | |
| anomalies_base, cumcutoff_base = detect_anomalies(results, 50) | |
| plot_temp(anomalies_base, results, test, "Anomalies - Base Model") | |
| plot_changepoint_probabilities(results, cumcutoff_base, "Base Model Changepoint Probabilities") | |
| anomalies_high_sensitivity, cumcutoff_high_sensitivity = detect_anomalies(results, 24) | |
| plot_temp(anomalies_high_sensitivity, results, test, "Anomalies - High Sensitivity Model") | |
| plot_changepoint_probabilities(results, cumcutoff_high_sensitivity, "High Sensitivity Model Changepoint Probabilities") | |
| anomalies_less_sensitivity, cumcutoff_less_sensitivity = detect_anomalies(results, 31*24) | |
| plot_temp(anomalies_less_sensitivity, results, test, "Anomalies - Less Sensitivity Model") | |
| plot_changepoint_probabilities(results, cumcutoff_less_sensitivity, "Less Sensitivity Model Changepoint Probabilities") | |
| if __name__ == "__main__": | |
| main() | |