Spaces:

Dhanushlevi
/

ANAMOLY_DETECTION

Runtime error

App Files Files Community

ANAMOLY_DETECTION / app.py

Dhanushlevi

Rename comp.py to app.py

055c451 verified about 2 years ago

raw

history blame contribute delete

14.5 kB

	import streamlit as st
	import pickle
	import pandas as pd
	import numpy as np
	import banpei
	import os
	import plotly.graph_objs as go
	import matplotlib.pyplot as plt
	import seaborn as sns
	import random

	# Load the pre-trained model
	with open('ANOMALY_random_forest_regressor.pickle', 'rb') as f:
	loaded_regressor = pickle.load(f)

	# Function to predict TV delta
	def predict_tv_delta(temperature, holiday, prev_value, twice_prev_value, day_shift, month_shift):
	X_manual = pd.DataFrame({
	'Temperature': [temperature],
	'holiday': [holiday],
	'prev value': [prev_value],
	'twice prev value': [twice_prev_value],
	'day shift': [day_shift],
	'month shift': [month_shift]
	})

	feature_names = X_manual.columns.tolist()

	y_pred_manual = loaded_regressor.predict(X_manual[feature_names])
	return y_pred_manual[0]

	# Function to detect and classify anomalies
	def detect_classify_anomalies(df, window):
	df.replace([np.inf, -np.inf], np.NaN, inplace=True)
	df.fillna(0, inplace=True)
	df['error'] = df['actuals'] - df['predicted']
	df['percentage_change'] = ((df['actuals'] - df['predicted']) / df['actuals']) * 100
	df['meanval'] = df['error'].rolling(window=window).mean()
	df['deviation'] = df['error'].rolling(window=window).std()
	df['-3s'] = df['meanval'] - (2 * df['deviation'])
	df['3s'] = df['meanval'] + (2 * df['deviation'])
	df['-2s'] = df['meanval'] - (1.75 * df['deviation'])
	df['2s'] = df['meanval'] + (1.75 * df['deviation'])
	df['-1s'] = df['meanval'] - (1.5 * df['deviation'])
	df['1s'] = df['meanval'] + (1.5 * df['deviation'])
	cut_list = df[['error', '-3s', '-2s', '-1s', 'meanval', '1s', '2s', '3s']]
	cut_values = cut_list.values
	cut_sort = np.sort(cut_values)
	df['impact'] = [(lambda x: np.where(cut_sort == df['error'][x])[1][0])(x) for x in range(len(df['error']))]
	severity = {0: 3, 1: 2, 2: 1, 3: 0, 4: 0, 5: 1, 6: 2, 7: 3}
	region = {0: "NEGATIVE", 1: "NEGATIVE", 2: "NEGATIVE", 3: "NEGATIVE", 4: "POSITIVE", 5: "POSITIVE", 6: "POSITIVE",
	7: "POSITIVE"}
	df['color'] = df['impact'].map(severity)
	df['region'] = df['impact'].map(region)
	df['anomaly_points'] = np.where(df['color'] == 3, df['error'], np.nan)
	df = df.sort_values(by='load_date', ascending=False)
	df['load_date'] = pd.to_datetime(df['load_date'], format="%Y-%m-%d %H:%M:%S")
	return df

	# Function to plot anomaly data
	def plot_anomaly(df, metric_name):
	dates = df.load_date
	bool_array = (abs(df['anomaly_points']) > 0)
	actuals = df["actuals"][-len(bool_array):]
	anomaly_points = bool_array * actuals
	anomaly_points[anomaly_points == 0] = np.nan
	color_map = {0: 'rgb(228, 222, 249)', 1: "yellow", 2: "orange", 3: "red"}
	table = go.Table(
	domain=dict(x=[0, 1], y=[0, 0.3]),
	columnwidth=[1, 2],
	header=dict(height=20, values=[['<b>Date</b>'], ['<b>Actual Values </b>'],
	['<b>Predicted</b>'], ['<b>% Difference</b>'], ['<b>Severity (0-3)</b>']],
	font=dict(color=['rgb(45, 45, 45)'] * 5, size=14),
	fill=dict(color='#d562be')),
	cells=dict(values=[df.round(3)[k].tolist() for k in ['load_date', 'actuals', 'predicted',
	'percentage_change', 'color']],
	line=dict(color='#506784'),
	align=['center'] * 5,
	font=dict(color=['rgb(40, 40, 40)'] * 5, size=12),
	suffix=[None] + [''] + [''] + ['%'] + [''],
	height=27,
	fill=dict(color=[df['color'].map(color_map)],
	)
	))
	anomalies = go.Scatter(name="Anomaly",
	x=dates,
	xaxis='x1',
	yaxis='y1',
	y=df['anomaly_points'],
	mode='markers',
	marker=dict(color='red', size=11, line=dict(color="red", width=2)))
	upper_bound = go.Scatter(hoverinfo="skip",
	x=dates,
	showlegend=False,
	xaxis='x1',
	yaxis='y1',
	y=df['3s'],
	marker=dict(color="#444"),
	line=dict(color=('rgb(23, 96, 167)'),
	width=2,
	dash='dash'),
	fillcolor='rgb(68, 68, 68)',
	fill='tonexty')
	lower_bound = go.Scatter(name='Confidence',
	x=dates,
	xaxis='x1',
	yaxis='y1',
	y=df['-3s'],
	marker=dict(color="#444"),
	line=dict(color=('rgb(23, 96, 167)'),
	width=2,
	dash='dash'),
	fillcolor='rgb(68, 68, 68)',
	fill='tonexty')
	Actuals = go.Scatter(name='Actuals',
	x=dates,
	y=df['actuals'],
	xaxis='x2', yaxis='y2',
	mode='lines',
	marker=dict(size=12,
	line=dict(width=1),
	color="blue"))
	Predicted = go.Scatter(name='Predicted',
	x=dates,
	y=df['predicted'],
	xaxis='x2', yaxis='y2',
	mode='lines',
	marker=dict(size=12,
	line=dict(width=1),
	color="orange"))
	Error = go.Scatter(name="Error",
	x=dates, y=df['error'],
	xaxis='x1',
	yaxis='y1',
	mode='lines',
	marker=dict(size=12,
	line=dict(width=1),
	color="red"),
	text="Error")
	anomalies_map = go.Scatter(name="anomaly actual",
	showlegend=False,
	x=dates,
	y=anomaly_points,
	mode='markers',
	xaxis='x2',
	yaxis='y2',
	marker=dict(color="red", size=11, line=dict(color="red", width=2)))
	Mvingavrg = go.Scatter(name="Moving A",
	x=dates,
	y=df['meanval'],
	mode='lines',
	xaxis='x1',
	yaxis='y1',
	marker=dict(size=12,
	line=dict(width=1),
	color="green"),
	text="Moving A")
	axis = dict(
	showline=True,
	zeroline=False,
	showgrid=True,
	mirror=True,
	ticklen=4,
	gridcolor='#ffffff',
	tickfont=dict(size=10))
	layout = dict(
	width=1000,
	height=865,
	autosize=False,
	title=metric_name,
	margin=dict(t=75),
	showlegend=True,
	xaxis1=dict(axis, **dict(domain=[0, 1], anchor='y1', showticklabels=True)),
	xaxis2=dict(axis, **dict(domain=[0, 1], anchor='y2', showticklabels=True)),
	yaxis1=dict(axis, *dict(domain=[2 0.21 + 0.20 + 0.09, 1], anchor='x1', hoverformat='.2f')),
	yaxis2=dict(axis, *dict(domain=[0.21 + 0.12, 2 0.31 + 0.02], anchor='x2', hoverformat='.2f')))
	fig = go.Figure(data=[table, anomalies, anomalies_map,
	upper_bound, lower_bound, Actuals, Predicted,
	Mvingavrg, Error], layout=layout)
	st.plotly_chart(fig)

	# Function to plot temperature anomalies
	def plot_temp(anomalies, results,test, title):
	fig, ax = plt.subplots(figsize=(20, 10))
	sns.set_style('darkgrid')
	sns.lineplot(data=test['tv delta'][3616:6420], color='green', ax=ax)

	ymins = [int(test['tv delta'][i] - 25) for i in range(len(anomalies))]
	ymaxs = [int(test['tv delta'][i] + 25) for i in range(len(anomalies))]

	ax.vlines(x=anomalies, ymin=ymins, ymax=ymaxs, colors='red', ls='-', lw=1)

	ax.vlines(x=3895, ymin=0, ymax=350, colors='grey', ls='--', lw=1)
	ax.text(x=3895, y=325, s=' Start of Summer', alpha=1, color='black')

	ax.vlines(x=6141, ymin=0, ymax=350, colors='grey', ls='--', lw=1)
	ax.text(x=6141, y=325, s=' End of Summer', alpha=1, color='black')

	ax.set_xlim(3616, 6420)
	ax.set_ylim(0, 350)
	ax.set_title(title, fontsize=20)
	st.pyplot(fig)

	# Function to plot changepoint probabilities
	def plot_changepoint_probabilities(results, cumcutoff, title):
	sns.set(rc={'figure.figsize':(20,5)})
	sns.lineplot(data=results)
	sns.lineplot(data=np.asarray(cumcutoff))
	plt.xlim(3616, 6420)
	plt.ylim(.00005, 1)
	plt.yscale('log')
	plt.title(title, fontsize=20)
	st.pyplot()

	# Function to detect anomalies using different models
	def detect_anomalies(results, window_size):
	outlierbin = []
	avgprog = [0] * window_size
	stdprog = [0] * window_size
	cumcutoff = []

	for i in results:
	avgprog.append(i)
	stdprog.append(i)
	cumcutoff.append(np.mean(avgprog) + (2 * np.std(stdprog)))
	if np.mean(avgprog) - (2 * np.std(stdprog)) <= i <= np.mean(avgprog) + (2 * np.std(stdprog)):
	outlierbin.append(0)
	else:
	outlierbin.append(1)
	avgprog.pop(0)
	stdprog.pop(0)

	anomalies = to_xcoords(outlierbin)

	return anomalies, cumcutoff

	# Define a function to convert outlier detection to x-coordinates
	def to_xcoords(outlierbin):
	xcoords = []
	csum = 0
	for i in outlierbin:
	csum += 1
	xcoords.append(csum * i)
	return xcoords

	# Main function
	# Main function
	def main():
	st.set_page_config(layout="wide")
	st.title("Combined Streamlit App")

	# Page selection
	selected_page = st.sidebar.multiselect("Select Page", ["TV Delta Prediction", "Anomaly Detection and Analysis", "Temperature Anomaly Detection"])

	if "TV Delta Prediction" in selected_page:
	st.title('TV Delta Prediction')
	st.write('Enter the values below to predict TV delta.')

	temperature = st.slider('Temperature', min_value=-20.0, max_value=40.0, value=5.0)
	holiday = st.selectbox('Holiday', options=['No', 'Yes'], index=1)
	prev_value = st.number_input('Previous Value', value=29.0)
	twice_prev_value = st.number_input('Twice Previous Value', value=41.0)
	day_shift = st.slider('Day Shift', min_value=-20.0, max_value=20.0, value=19.0)
	month_shift = st.slider('Month Shift', min_value=-200.0, max_value=200.0, value=123.0)

	holiday_binary = 1 if holiday == 'Yes' else 0

	if st.button('Predict'):
	result = predict_tv_delta(temperature, holiday_binary, prev_value, twice_prev_value, day_shift, month_shift)
	st.write('Predicted TV delta:', result)

	if "Anomaly Detection and Analysis" in selected_page:
	st.title("Anomaly Detection and Analysis")
	st.sidebar.title("Settings")
	train_file = st.sidebar.file_uploader("Upload Training Data CSV", type=['csv'])
	test_file = st.sidebar.file_uploader("Upload Testing Data CSV", type=['csv'])
	train_predict_file = st.sidebar.file_uploader("Upload Training Predictions CSV", type=['csv'])
	test_predict_file = st.sidebar.file_uploader("Upload Testing Predictions CSV", type=['csv'])

	if train_file and test_file and train_predict_file and test_predict_file:
	train = pd.read_csv(train_file)
	test = pd.read_csv(test_file)
	train_predict = pd.read_csv(train_predict_file)
	test_predict = pd.read_csv(test_predict_file)

	train_predict['Timestamp'] = pd.to_datetime(train_predict['Timestamp'])
	test_predict['Timestamp'] = pd.to_datetime(test_predict['Timestamp'])

	predicted_df = test_predict[['Timestamp', 'value delta', 'pred']].copy()
	predicted_df.columns = ['load_date', 'actuals', 'predicted']
	predicted_df['load_date2'] = predicted_df['load_date']
	predicted_df = predicted_df.set_index('load_date2')

	classify_df = detect_classify_anomalies(predicted_df, '14D')

	st.markdown("---")
	plot_anomaly(classify_df, "Metric Name")
	st.markdown("---")

	st.subheader("Cluster Counts")
	classify_df['cluster'] = np.where(classify_df['color'] == 3, 'Outlier', 'Non-Outlier')
	cluster_counts = classify_df['cluster'].value_counts()
	st.write(cluster_counts)

	if "Temperature Anomaly Detection" in selected_page:
	st.title("Temperature Anomaly Detection")

	train = pd.read_csv('train_predictions.csv')
	train.drop('Timestamp', axis=1, inplace=True)
	test = pd.read_csv('test_predictions.csv')
	test.drop('Timestamp', axis=1, inplace=True)

	model = banpei.SST(w=50)

	results = model.detect(test['tv delta'], is_lanczos=True)

	anomalies_base, cumcutoff_base = detect_anomalies(results, 50)
	plot_temp(anomalies_base, results, test, "Anomalies - Base Model")
	plot_changepoint_probabilities(results, cumcutoff_base, "Base Model Changepoint Probabilities")

	anomalies_high_sensitivity, cumcutoff_high_sensitivity = detect_anomalies(results, 24)
	plot_temp(anomalies_high_sensitivity, results, test, "Anomalies - High Sensitivity Model")
	plot_changepoint_probabilities(results, cumcutoff_high_sensitivity, "High Sensitivity Model Changepoint Probabilities")

	anomalies_less_sensitivity, cumcutoff_less_sensitivity = detect_anomalies(results, 31*24)
	plot_temp(anomalies_less_sensitivity, results, test, "Anomalies - Less Sensitivity Model")
	plot_changepoint_probabilities(results, cumcutoff_less_sensitivity, "Less Sensitivity Model Changepoint Probabilities")


	if __name__ == "__main__":
	main()