Spaces:

etuncer
/

website-traffic-forecasting

Sleeping

App Files Files Community

website-traffic-forecasting / src /app.py

etuncer

Update src/app.py

03d8270 verified 6 months ago

raw

history blame contribute delete

8.24 kB

	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	from statsmodels.tsa.seasonal import seasonal_decompose
	from statsmodels.graphics.tsaplots import plot_pacf
	import warnings
	import joblib
	from datetime import timedelta

	# Configure warnings and plotting
	warnings.filterwarnings('ignore')
	plt.rcParams['figure.figsize'] = (12, 8)
	sns.set_style('darkgrid')

	# Page configuration
	st.set_page_config(
	page_title="Website Traffic Forecasting",
	page_icon="📈",
	layout="wide"
	)

	# Load data and model
	@st.cache_data
	def load_data():
	try:
	df = pd.read_csv('src/Thecleverprogrammer.csv')
	df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y')
	return df
	except FileNotFoundError:
	st.error("Data file 'Thecleverprogrammer.csv' not found. Please ensure the file is in the same directory.")
	return None

	@st.cache_resource
	def load_model():
	try:
	model = joblib.load('src/website_traffic_model.pkl')
	return model
	except FileNotFoundError:
	st.error("Model file 'website_traffic_model.pkl' not found. Please run the notebook first to generate the model.")
	return None

	# Main app
	st.title("📈 Website Traffic Forecasting Dashboard")
	st.markdown("Predict website traffic patterns using SARIMA time series analysis")

	# Load data and model
	df = load_data()
	model = load_model()

	if df is not None and model is not None:
	# Sidebar for controls
	st.sidebar.header("Controls")

	# Prediction days slider
	prediction_days = st.sidebar.slider(
	"Number of days to predict:",
	min_value=7,
	max_value=120,
	value=60,
	step=7
	)

	# Show data info
	st.sidebar.subheader("Dataset Info")
	st.sidebar.write(f"Total records: {len(df)}")
	st.sidebar.write(f"Date range: {df['Date'].min().strftime('%Y-%m-%d')} to {df['Date'].max().strftime('%Y-%m-%d')}")
	st.sidebar.write(f"Average daily views: {df['Views'].mean():.0f}")

	# Main content tabs
	tab1, tab2, tab3, tab4 = st.tabs(["📊 Data Overview", "🔍 Analysis", "🔮 Predictions", "📈 Model Performance"])

	with tab1:
	st.header("Data Overview")

	# Key metrics
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Total Views", f"{df['Views'].sum():,}")
	with col2:
	st.metric("Average Daily Views", f"{df['Views'].mean():.0f}")
	with col3:
	st.metric("Peak Views", f"{df['Views'].max():,}")
	with col4:
	st.metric("Min Views", f"{df['Views'].min():,}")

	# Time series plot
	fig = px.line(df, x='Date', y='Views', title='Website Traffic Over Time')
	fig.update_layout(
	xaxis_title="Date",
	yaxis_title="Views"
	)
	st.plotly_chart(fig, width='stretch', config={'displayModeBar': True, 'displaylogo': False})

	# Data table
	st.subheader("Raw Data")
	st.dataframe(df.tail(10), width='stretch')

	with tab2:
	st.header("Time Series Analysis")

	# Seasonal decomposition
	st.subheader("Seasonal Decomposition")
	result = seasonal_decompose(df['Views'], model='multiplicative', period=30)

	# Create subplots for decomposition
	fig = make_subplots(
	rows=4, cols=1,
	subplot_titles=('Original', 'Trend', 'Seasonal', 'Residual'),
	vertical_spacing=0.08
	)

	fig.add_trace(go.Scatter(x=df['Date'], y=result.observed, name='Original'), row=1, col=1)
	fig.add_trace(go.Scatter(x=df['Date'], y=result.trend, name='Trend'), row=2, col=1)
	fig.add_trace(go.Scatter(x=df['Date'], y=result.seasonal, name='Seasonal'), row=3, col=1)
	fig.add_trace(go.Scatter(x=df['Date'], y=result.resid, name='Residual'), row=4, col=1)

	fig.update_layout(height=800, showlegend=False)
	st.plotly_chart(fig, width='stretch')

	# Autocorrelation analysis
	col1, col2 = st.columns(2)

	with col1:
	st.subheader("Autocorrelation Plot")
	fig, ax = plt.subplots(figsize=(10, 6))
	pd.plotting.autocorrelation_plot(df['Views'], ax=ax)
	st.pyplot(fig)

	with col2:
	st.subheader("Partial Autocorrelation Plot")
	fig, ax = plt.subplots(figsize=(10, 6))
	plot_pacf(df['Views'], lags=50, ax=ax)
	st.pyplot(fig)

	with tab3:
	st.header("Traffic Predictions")

	# Generate predictions
	predictions = model.predict(len(df), len(df) + prediction_days - 1)

	# Create future dates
	last_date = df['Date'].max()
	future_dates = pd.date_range(start=last_date + timedelta(days=1), periods=prediction_days, freq='D')

	# Create prediction dataframe
	pred_df = pd.DataFrame({
	'Date': future_dates,
	'Predicted_Views': predictions
	})

	# Combined plot
	fig = go.Figure()

	# Historical data
	fig.add_trace(go.Scatter(
	x=df['Date'],
	y=df['Views'],
	mode='lines',
	name='Historical Data',
	line=dict(color='blue', width=2)
	))

	# Predictions
	fig.add_trace(go.Scatter(
	x=pred_df['Date'],
	y=pred_df['Predicted_Views'],
	mode='lines',
	name='Predictions',
	line=dict(color='red', width=2, dash='dash')
	))

	fig.update_layout(
	title='Website Traffic Forecast',
	xaxis_title='Date',
	yaxis_title='Views',
	height=500
	)

	st.plotly_chart(fig, width='stretch', config={'displayModeBar': True, 'displaylogo': False})

	# Prediction summary
	st.subheader("Prediction Summary")
	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric("Average Predicted Views", f"{predictions.mean():.0f}")
	with col2:
	st.metric("Max Predicted Views", f"{predictions.max():.0f}")
	with col3:
	st.metric("Min Predicted Views", f"{predictions.min():.0f}")

	# Prediction table
	st.subheader("Detailed Predictions")
	pred_df['Date'] = pred_df['Date'].dt.strftime('%Y-%m-%d')
	pred_df['Predicted_Views'] = pred_df['Predicted_Views'].round(0).astype(int)
	st.dataframe(pred_df, width='stretch')

	with tab4:
	st.header("Model Performance")

	# Model summary
	st.subheader("SARIMA Model Summary")
	st.text(str(model.summary()))

	# Model parameters
	st.subheader("Model Parameters")
	col1, col2 = st.columns(2)

	with col1:
	st.write("Order (p,d,q): (5,1,2)")
	st.write("Seasonal Order (P,D,Q,s): (5,1,2,12)")

	with col2:
	st.write("AIC: {:.2f}".format(model.aic))
	st.write("BIC: {:.2f}".format(model.bic))

	# Residuals analysis
	st.subheader("Residuals Analysis")
	residuals = model.resid

	col1, col2 = st.columns(2)

	with col1:
	fig, ax = plt.subplots(figsize=(10, 6))
	ax.hist(residuals, bins=30, alpha=0.7, edgecolor='black')
	ax.set_title('Residuals Distribution')
	ax.set_xlabel('Residuals')
	ax.set_ylabel('Frequency')
	st.pyplot(fig)

	with col2:
	fig, ax = plt.subplots(figsize=(10, 6))
	ax.scatter(range(len(residuals)), residuals, alpha=0.6)
	ax.set_title('Residuals vs Time')
	ax.set_xlabel('Time')
	ax.set_ylabel('Residuals')
	ax.axhline(y=0, color='red', linestyle='--')
	st.pyplot(fig)

	else:
	st.error("Please ensure both the data file and model file are available to run the application.")