Dhanushlevi's picture
Rename comp.py to app.py
055c451 verified
import streamlit as st
import pickle
import pandas as pd
import numpy as np
import banpei
import os
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import seaborn as sns
import random
# Load the pre-trained model
with open('ANOMALY_random_forest_regressor.pickle', 'rb') as f:
loaded_regressor = pickle.load(f)
# Function to predict TV delta
def predict_tv_delta(temperature, holiday, prev_value, twice_prev_value, day_shift, month_shift):
X_manual = pd.DataFrame({
'Temperature': [temperature],
'holiday': [holiday],
'prev value': [prev_value],
'twice prev value': [twice_prev_value],
'day shift': [day_shift],
'month shift': [month_shift]
})
feature_names = X_manual.columns.tolist()
y_pred_manual = loaded_regressor.predict(X_manual[feature_names])
return y_pred_manual[0]
# Function to detect and classify anomalies
def detect_classify_anomalies(df, window):
df.replace([np.inf, -np.inf], np.NaN, inplace=True)
df.fillna(0, inplace=True)
df['error'] = df['actuals'] - df['predicted']
df['percentage_change'] = ((df['actuals'] - df['predicted']) / df['actuals']) * 100
df['meanval'] = df['error'].rolling(window=window).mean()
df['deviation'] = df['error'].rolling(window=window).std()
df['-3s'] = df['meanval'] - (2 * df['deviation'])
df['3s'] = df['meanval'] + (2 * df['deviation'])
df['-2s'] = df['meanval'] - (1.75 * df['deviation'])
df['2s'] = df['meanval'] + (1.75 * df['deviation'])
df['-1s'] = df['meanval'] - (1.5 * df['deviation'])
df['1s'] = df['meanval'] + (1.5 * df['deviation'])
cut_list = df[['error', '-3s', '-2s', '-1s', 'meanval', '1s', '2s', '3s']]
cut_values = cut_list.values
cut_sort = np.sort(cut_values)
df['impact'] = [(lambda x: np.where(cut_sort == df['error'][x])[1][0])(x) for x in range(len(df['error']))]
severity = {0: 3, 1: 2, 2: 1, 3: 0, 4: 0, 5: 1, 6: 2, 7: 3}
region = {0: "NEGATIVE", 1: "NEGATIVE", 2: "NEGATIVE", 3: "NEGATIVE", 4: "POSITIVE", 5: "POSITIVE", 6: "POSITIVE",
7: "POSITIVE"}
df['color'] = df['impact'].map(severity)
df['region'] = df['impact'].map(region)
df['anomaly_points'] = np.where(df['color'] == 3, df['error'], np.nan)
df = df.sort_values(by='load_date', ascending=False)
df['load_date'] = pd.to_datetime(df['load_date'], format="%Y-%m-%d %H:%M:%S")
return df
# Function to plot anomaly data
def plot_anomaly(df, metric_name):
dates = df.load_date
bool_array = (abs(df['anomaly_points']) > 0)
actuals = df["actuals"][-len(bool_array):]
anomaly_points = bool_array * actuals
anomaly_points[anomaly_points == 0] = np.nan
color_map = {0: 'rgb(228, 222, 249)', 1: "yellow", 2: "orange", 3: "red"}
table = go.Table(
domain=dict(x=[0, 1], y=[0, 0.3]),
columnwidth=[1, 2],
header=dict(height=20, values=[['<b>Date</b>'], ['<b>Actual Values </b>'],
['<b>Predicted</b>'], ['<b>% Difference</b>'], ['<b>Severity (0-3)</b>']],
font=dict(color=['rgb(45, 45, 45)'] * 5, size=14),
fill=dict(color='#d562be')),
cells=dict(values=[df.round(3)[k].tolist() for k in ['load_date', 'actuals', 'predicted',
'percentage_change', 'color']],
line=dict(color='#506784'),
align=['center'] * 5,
font=dict(color=['rgb(40, 40, 40)'] * 5, size=12),
suffix=[None] + [''] + [''] + ['%'] + [''],
height=27,
fill=dict(color=[df['color'].map(color_map)],
)
))
anomalies = go.Scatter(name="Anomaly",
x=dates,
xaxis='x1',
yaxis='y1',
y=df['anomaly_points'],
mode='markers',
marker=dict(color='red', size=11, line=dict(color="red", width=2)))
upper_bound = go.Scatter(hoverinfo="skip",
x=dates,
showlegend=False,
xaxis='x1',
yaxis='y1',
y=df['3s'],
marker=dict(color="#444"),
line=dict(color=('rgb(23, 96, 167)'),
width=2,
dash='dash'),
fillcolor='rgb(68, 68, 68)',
fill='tonexty')
lower_bound = go.Scatter(name='Confidence',
x=dates,
xaxis='x1',
yaxis='y1',
y=df['-3s'],
marker=dict(color="#444"),
line=dict(color=('rgb(23, 96, 167)'),
width=2,
dash='dash'),
fillcolor='rgb(68, 68, 68)',
fill='tonexty')
Actuals = go.Scatter(name='Actuals',
x=dates,
y=df['actuals'],
xaxis='x2', yaxis='y2',
mode='lines',
marker=dict(size=12,
line=dict(width=1),
color="blue"))
Predicted = go.Scatter(name='Predicted',
x=dates,
y=df['predicted'],
xaxis='x2', yaxis='y2',
mode='lines',
marker=dict(size=12,
line=dict(width=1),
color="orange"))
Error = go.Scatter(name="Error",
x=dates, y=df['error'],
xaxis='x1',
yaxis='y1',
mode='lines',
marker=dict(size=12,
line=dict(width=1),
color="red"),
text="Error")
anomalies_map = go.Scatter(name="anomaly actual",
showlegend=False,
x=dates,
y=anomaly_points,
mode='markers',
xaxis='x2',
yaxis='y2',
marker=dict(color="red", size=11, line=dict(color="red", width=2)))
Mvingavrg = go.Scatter(name="Moving A",
x=dates,
y=df['meanval'],
mode='lines',
xaxis='x1',
yaxis='y1',
marker=dict(size=12,
line=dict(width=1),
color="green"),
text="Moving A")
axis = dict(
showline=True,
zeroline=False,
showgrid=True,
mirror=True,
ticklen=4,
gridcolor='#ffffff',
tickfont=dict(size=10))
layout = dict(
width=1000,
height=865,
autosize=False,
title=metric_name,
margin=dict(t=75),
showlegend=True,
xaxis1=dict(axis, **dict(domain=[0, 1], anchor='y1', showticklabels=True)),
xaxis2=dict(axis, **dict(domain=[0, 1], anchor='y2', showticklabels=True)),
yaxis1=dict(axis, **dict(domain=[2 * 0.21 + 0.20 + 0.09, 1], anchor='x1', hoverformat='.2f')),
yaxis2=dict(axis, **dict(domain=[0.21 + 0.12, 2 * 0.31 + 0.02], anchor='x2', hoverformat='.2f')))
fig = go.Figure(data=[table, anomalies, anomalies_map,
upper_bound, lower_bound, Actuals, Predicted,
Mvingavrg, Error], layout=layout)
st.plotly_chart(fig)
# Function to plot temperature anomalies
def plot_temp(anomalies, results,test, title):
fig, ax = plt.subplots(figsize=(20, 10))
sns.set_style('darkgrid')
sns.lineplot(data=test['tv delta'][3616:6420], color='green', ax=ax)
ymins = [int(test['tv delta'][i] - 25) for i in range(len(anomalies))]
ymaxs = [int(test['tv delta'][i] + 25) for i in range(len(anomalies))]
ax.vlines(x=anomalies, ymin=ymins, ymax=ymaxs, colors='red', ls='-', lw=1)
ax.vlines(x=3895, ymin=0, ymax=350, colors='grey', ls='--', lw=1)
ax.text(x=3895, y=325, s=' Start of Summer', alpha=1, color='black')
ax.vlines(x=6141, ymin=0, ymax=350, colors='grey', ls='--', lw=1)
ax.text(x=6141, y=325, s=' End of Summer', alpha=1, color='black')
ax.set_xlim(3616, 6420)
ax.set_ylim(0, 350)
ax.set_title(title, fontsize=20)
st.pyplot(fig)
# Function to plot changepoint probabilities
def plot_changepoint_probabilities(results, cumcutoff, title):
sns.set(rc={'figure.figsize':(20,5)})
sns.lineplot(data=results)
sns.lineplot(data=np.asarray(cumcutoff))
plt.xlim(3616, 6420)
plt.ylim(.00005, 1)
plt.yscale('log')
plt.title(title, fontsize=20)
st.pyplot()
# Function to detect anomalies using different models
def detect_anomalies(results, window_size):
outlierbin = []
avgprog = [0] * window_size
stdprog = [0] * window_size
cumcutoff = []
for i in results:
avgprog.append(i)
stdprog.append(i)
cumcutoff.append(np.mean(avgprog) + (2 * np.std(stdprog)))
if np.mean(avgprog) - (2 * np.std(stdprog)) <= i <= np.mean(avgprog) + (2 * np.std(stdprog)):
outlierbin.append(0)
else:
outlierbin.append(1)
avgprog.pop(0)
stdprog.pop(0)
anomalies = to_xcoords(outlierbin)
return anomalies, cumcutoff
# Define a function to convert outlier detection to x-coordinates
def to_xcoords(outlierbin):
xcoords = []
csum = 0
for i in outlierbin:
csum += 1
xcoords.append(csum * i)
return xcoords
# Main function
# Main function
def main():
st.set_page_config(layout="wide")
st.title("Combined Streamlit App")
# Page selection
selected_page = st.sidebar.multiselect("Select Page", ["TV Delta Prediction", "Anomaly Detection and Analysis", "Temperature Anomaly Detection"])
if "TV Delta Prediction" in selected_page:
st.title('TV Delta Prediction')
st.write('Enter the values below to predict TV delta.')
temperature = st.slider('Temperature', min_value=-20.0, max_value=40.0, value=5.0)
holiday = st.selectbox('Holiday', options=['No', 'Yes'], index=1)
prev_value = st.number_input('Previous Value', value=29.0)
twice_prev_value = st.number_input('Twice Previous Value', value=41.0)
day_shift = st.slider('Day Shift', min_value=-20.0, max_value=20.0, value=19.0)
month_shift = st.slider('Month Shift', min_value=-200.0, max_value=200.0, value=123.0)
holiday_binary = 1 if holiday == 'Yes' else 0
if st.button('Predict'):
result = predict_tv_delta(temperature, holiday_binary, prev_value, twice_prev_value, day_shift, month_shift)
st.write('Predicted TV delta:', result)
if "Anomaly Detection and Analysis" in selected_page:
st.title("Anomaly Detection and Analysis")
st.sidebar.title("Settings")
train_file = st.sidebar.file_uploader("Upload Training Data CSV", type=['csv'])
test_file = st.sidebar.file_uploader("Upload Testing Data CSV", type=['csv'])
train_predict_file = st.sidebar.file_uploader("Upload Training Predictions CSV", type=['csv'])
test_predict_file = st.sidebar.file_uploader("Upload Testing Predictions CSV", type=['csv'])
if train_file and test_file and train_predict_file and test_predict_file:
train = pd.read_csv(train_file)
test = pd.read_csv(test_file)
train_predict = pd.read_csv(train_predict_file)
test_predict = pd.read_csv(test_predict_file)
train_predict['Timestamp'] = pd.to_datetime(train_predict['Timestamp'])
test_predict['Timestamp'] = pd.to_datetime(test_predict['Timestamp'])
predicted_df = test_predict[['Timestamp', 'value delta', 'pred']].copy()
predicted_df.columns = ['load_date', 'actuals', 'predicted']
predicted_df['load_date2'] = predicted_df['load_date']
predicted_df = predicted_df.set_index('load_date2')
classify_df = detect_classify_anomalies(predicted_df, '14D')
st.markdown("---")
plot_anomaly(classify_df, "Metric Name")
st.markdown("---")
st.subheader("Cluster Counts")
classify_df['cluster'] = np.where(classify_df['color'] == 3, 'Outlier', 'Non-Outlier')
cluster_counts = classify_df['cluster'].value_counts()
st.write(cluster_counts)
if "Temperature Anomaly Detection" in selected_page:
st.title("Temperature Anomaly Detection")
train = pd.read_csv('train_predictions.csv')
train.drop('Timestamp', axis=1, inplace=True)
test = pd.read_csv('test_predictions.csv')
test.drop('Timestamp', axis=1, inplace=True)
model = banpei.SST(w=50)
results = model.detect(test['tv delta'], is_lanczos=True)
anomalies_base, cumcutoff_base = detect_anomalies(results, 50)
plot_temp(anomalies_base, results, test, "Anomalies - Base Model")
plot_changepoint_probabilities(results, cumcutoff_base, "Base Model Changepoint Probabilities")
anomalies_high_sensitivity, cumcutoff_high_sensitivity = detect_anomalies(results, 24)
plot_temp(anomalies_high_sensitivity, results, test, "Anomalies - High Sensitivity Model")
plot_changepoint_probabilities(results, cumcutoff_high_sensitivity, "High Sensitivity Model Changepoint Probabilities")
anomalies_less_sensitivity, cumcutoff_less_sensitivity = detect_anomalies(results, 31*24)
plot_temp(anomalies_less_sensitivity, results, test, "Anomalies - Less Sensitivity Model")
plot_changepoint_probabilities(results, cumcutoff_less_sensitivity, "Less Sensitivity Model Changepoint Probabilities")
if __name__ == "__main__":
main()