|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import plotly.express as px |
|
|
import plotly.graph_objects as go |
|
|
from statsmodels.tsa.seasonal import seasonal_decompose |
|
|
from statsmodels.tsa.stattools import adfuller, acf, pacf |
|
|
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf |
|
|
from statsmodels.tsa.holtwinters import ExponentialSmoothing |
|
|
from statsmodels.tsa.arima.model import ARIMA |
|
|
from prophet import Prophet |
|
|
from sklearn.metrics import mean_absolute_error, mean_squared_error |
|
|
import matplotlib.pyplot as plt |
|
|
import io |
|
|
import warnings |
|
|
warnings.filterwarnings("ignore") |
|
|
|
|
|
|
|
|
AUTHOR = "Eduardo Nacimiento García" |
|
|
EMAIL = "enacimie@ull.edu.es" |
|
|
LICENSE = "Apache 2.0" |
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="SimpleTS", |
|
|
page_icon="📈", |
|
|
layout="wide", |
|
|
initial_sidebar_state="expanded", |
|
|
) |
|
|
|
|
|
|
|
|
st.title("📈 SimpleTS") |
|
|
st.markdown(f"**Author:** {AUTHOR} | **Email:** {EMAIL} | **License:** {LICENSE}") |
|
|
st.write(""" |
|
|
Upload a time series CSV or use the demo dataset to visualize, analyze, and forecast your data. |
|
|
""") |
|
|
|
|
|
|
|
|
@st.cache_data |
|
|
def create_demo_ts(freq='D', periods=365): |
|
|
np.random.seed(42) |
|
|
date_rng = pd.date_range(start='2023-01-01', periods=periods, freq=freq) |
|
|
|
|
|
trend = np.linspace(100, 200, periods) |
|
|
if freq in ['D', 'W']: |
|
|
seasonality = 20 * np.sin(2 * np.pi * np.arange(periods) / 365.25) |
|
|
elif freq == 'M': |
|
|
seasonality = 25 * np.sin(2 * np.pi * np.arange(periods) / 12) |
|
|
noise = np.random.normal(0, 5, periods) |
|
|
values = trend + seasonality + noise |
|
|
df = pd.DataFrame({ |
|
|
'Date': date_rng, |
|
|
'Value': values |
|
|
}) |
|
|
return df |
|
|
|
|
|
|
|
|
if "demo_loaded" not in st.session_state: |
|
|
st.session_state.demo_loaded = False |
|
|
st.session_state.freq = 'D' |
|
|
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
with col1: |
|
|
if st.button("🧪 Load Daily Demo"): |
|
|
st.session_state.demo_loaded = True |
|
|
st.session_state.freq = 'D' |
|
|
st.session_state.df = create_demo_ts('D', 365) |
|
|
st.success("✅ Daily demo loaded!") |
|
|
with col2: |
|
|
if st.button("🧪 Load Monthly Demo"): |
|
|
st.session_state.demo_loaded = True |
|
|
st.session_state.freq = 'M' |
|
|
st.session_state.df = create_demo_ts('M', 48) |
|
|
st.success("✅ Monthly demo loaded!") |
|
|
with col3: |
|
|
if st.button("🧪 Load Weekly Demo"): |
|
|
st.session_state.demo_loaded = True |
|
|
st.session_state.freq = 'W' |
|
|
st.session_state.df = create_demo_ts('W', 104) |
|
|
st.success("✅ Weekly demo loaded!") |
|
|
|
|
|
uploaded_file = st.file_uploader("📂 Upload your time series CSV (must have a date and a value column)", type=["csv"]) |
|
|
|
|
|
|
|
|
if uploaded_file: |
|
|
df = pd.read_csv(uploaded_file) |
|
|
st.session_state.df = df |
|
|
st.session_state.demo_loaded = False |
|
|
st.success("✅ File uploaded successfully.") |
|
|
elif "df" in st.session_state: |
|
|
df = st.session_state.df |
|
|
freq = st.session_state.freq |
|
|
if st.session_state.demo_loaded: |
|
|
st.info(f"Using **{freq}** frequency demo dataset.") |
|
|
else: |
|
|
df = None |
|
|
st.info("👆 Upload a CSV or load a demo dataset to begin.") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
with st.expander("🔍 Data Preview (first 10 rows)"): |
|
|
st.dataframe(df.head(10)) |
|
|
|
|
|
|
|
|
st.subheader("📅 Configure Time Series") |
|
|
|
|
|
date_col = st.selectbox("Select date column:", df.columns) |
|
|
value_col = st.selectbox("Select value column:", [col for col in df.columns if col != date_col]) |
|
|
|
|
|
|
|
|
try: |
|
|
df[date_col] = pd.to_datetime(df[date_col]) |
|
|
df = df.set_index(date_col).sort_index() |
|
|
ts = df[value_col] |
|
|
st.success("✅ Time series configured successfully.") |
|
|
except Exception as e: |
|
|
st.error(f"❌ Error processing date column: {e}") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
st.subheader("📊 Original Time Series") |
|
|
fig = px.line(x=ts.index, y=ts.values, labels={'x': 'Date', 'y': value_col}, title="Original Time Series") |
|
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
|
|
|
st.header("🔬 Time Series Analysis") |
|
|
|
|
|
|
|
|
st.subheader("📉 Stationarity Test (ADF)") |
|
|
adf_result = adfuller(ts.dropna()) |
|
|
st.write(f"- **ADF Statistic:** {adf_result[0]:.4f}") |
|
|
st.write(f"- **p-value:** {adf_result[1]:.4f}") |
|
|
if adf_result[1] < 0.05: |
|
|
st.success("🟢 Series is stationary (p < 0.05)") |
|
|
else: |
|
|
st.warning("🟠 Series is non-stationary (p >= 0.05) — consider differencing") |
|
|
|
|
|
|
|
|
st.subheader("🎯 Seasonal Decomposition") |
|
|
period_options = { |
|
|
'D': 365, |
|
|
'W': 52, |
|
|
'M': 12, |
|
|
'Q': 4, |
|
|
'Y': 1 |
|
|
} |
|
|
freq = st.session_state.freq if st.session_state.demo_loaded else 'D' |
|
|
default_period = period_options.get(freq, 12) |
|
|
|
|
|
period = st.number_input("Seasonal period (e.g., 12 for monthly, 365 for daily):", |
|
|
min_value=2, value=default_period, step=1) |
|
|
|
|
|
try: |
|
|
decomposition = seasonal_decompose(ts.dropna(), model='additive', period=int(period), extrapolate_trend='freq') |
|
|
|
|
|
|
|
|
fig = go.Figure() |
|
|
fig.add_trace(go.Scatter(x=decomposition.observed.index, y=decomposition.observed, mode='lines', name='Observed')) |
|
|
fig.add_trace(go.Scatter(x=decomposition.trend.index, y=decomposition.trend, mode='lines', name='Trend')) |
|
|
fig.add_trace(go.Scatter(x=decomposition.seasonal.index, y=decomposition.seasonal, mode='lines', name='Seasonal')) |
|
|
fig.add_trace(go.Scatter(x=decomposition.resid.index, y=decomposition.resid, mode='lines', name='Residual')) |
|
|
fig.update_layout(title="Seasonal Decomposition", height=600) |
|
|
st.plotly_chart(fig, use_container_width=True) |
|
|
except Exception as e: |
|
|
st.error(f"Could not decompose series: {e}") |
|
|
|
|
|
|
|
|
st.subheader("🔗 Autocorrelation (ACF) & Partial Autocorrelation (PACF)") |
|
|
|
|
|
|
|
|
n = len(ts.dropna()) |
|
|
safe_max_lag = max(1, int(n * 0.49)) |
|
|
|
|
|
|
|
|
max_lags_default = min(40, safe_max_lag) |
|
|
max_lags = st.slider( |
|
|
"Max lags:", |
|
|
min_value=1, |
|
|
max_value=safe_max_lag, |
|
|
value=max_lags_default, |
|
|
step=1, |
|
|
help=f"Max allowed lags: {safe_max_lag} (based on sample size: {n})" |
|
|
) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
|
st.write("**ACF Plot**") |
|
|
fig_acf, ax_acf = plt.subplots(figsize=(6, 4)) |
|
|
plot_acf(ts.dropna(), lags=max_lags, ax=ax_acf) |
|
|
st.pyplot(fig_acf) |
|
|
|
|
|
with col2: |
|
|
st.write("**PACF Plot**") |
|
|
try: |
|
|
fig_pacf, ax_pacf = plt.subplots(figsize=(6, 4)) |
|
|
plot_pacf(ts.dropna(), lags=max_lags, ax=ax_pacf) |
|
|
st.pyplot(fig_pacf) |
|
|
except Exception as e: |
|
|
st.error(f"Could not generate PACF plot: {e}") |
|
|
st.write("Try reducing the number of lags.") |
|
|
|
|
|
|
|
|
st.header("🤖 Forecasting Models") |
|
|
|
|
|
|
|
|
test_size = st.slider("Test set size (as % of data):", min_value=5, max_value=40, value=20, step=5) |
|
|
split_point = int(len(ts) * (1 - test_size/100)) |
|
|
train, test = ts[:split_point], ts[split_point:] |
|
|
|
|
|
st.write(f"Training on {len(train)} points, testing on {len(test)} points.") |
|
|
|
|
|
model_choice = st.selectbox("Choose forecasting model:", |
|
|
["Holt-Winters Exponential Smoothing", "ARIMA", "Prophet"]) |
|
|
|
|
|
|
|
|
forecast = None |
|
|
model = None |
|
|
|
|
|
if model_choice == "Holt-Winters Exponential Smoothing": |
|
|
seasonal_periods = st.number_input("Seasonal periods:", min_value=2, value=period, step=1) |
|
|
try: |
|
|
hw_model = ExponentialSmoothing( |
|
|
train, |
|
|
trend='add', |
|
|
seasonal='add', |
|
|
seasonal_periods=seasonal_periods |
|
|
).fit() |
|
|
forecast = hw_model.forecast(len(test)) |
|
|
model = hw_model |
|
|
except Exception as e: |
|
|
st.error(f"Could not fit Holt-Winters model: {e}") |
|
|
|
|
|
elif model_choice == "ARIMA": |
|
|
col1, col2, col3 = st.columns(3) |
|
|
p = col1.number_input("AR order (p):", min_value=0, max_value=5, value=1) |
|
|
d = col2.number_input("Differencing order (d):", min_value=0, max_value=2, value=1) |
|
|
q = col3.number_input("MA order (q):", min_value=0, max_value=5, value=1) |
|
|
try: |
|
|
arima_model = ARIMA(train, order=(p, d, q)).fit() |
|
|
forecast = arima_model.forecast(len(test)) |
|
|
model = arima_model |
|
|
except Exception as e: |
|
|
st.error(f"Could not fit ARIMA model: {e}") |
|
|
|
|
|
elif model_choice == "Prophet": |
|
|
|
|
|
prophet_df = pd.DataFrame({ |
|
|
'ds': train.index, |
|
|
'y': train.values |
|
|
}) |
|
|
try: |
|
|
prophet_model = Prophet( |
|
|
yearly_seasonality=True if freq in ['D', 'W'] else False, |
|
|
weekly_seasonality=True if freq == 'D' else False, |
|
|
daily_seasonality=False |
|
|
) |
|
|
if freq == 'M': |
|
|
prophet_model.add_seasonality(name='monthly', period=30.5, fourier_order=5) |
|
|
prophet_model.fit(prophet_df) |
|
|
|
|
|
|
|
|
future = pd.DataFrame({'ds': test.index}) |
|
|
forecast_df = prophet_model.predict(future) |
|
|
forecast = forecast_df['yhat'].values |
|
|
model = prophet_model |
|
|
except Exception as e: |
|
|
st.error(f"Could not fit Prophet model: {e}") |
|
|
|
|
|
|
|
|
if forecast is not None: |
|
|
|
|
|
mae = mean_absolute_error(test, forecast) |
|
|
mse = mean_squared_error(test, forecast) |
|
|
rmse = np.sqrt(mse) |
|
|
|
|
|
st.subheader("📈 Forecast Results") |
|
|
col1, col2, col3 = st.columns(3) |
|
|
col1.metric("MAE", f"{mae:.2f}") |
|
|
col2.metric("MSE", f"{mse:.2f}") |
|
|
col3.metric("RMSE", f"{rmse:.2f}") |
|
|
|
|
|
|
|
|
fig = go.Figure() |
|
|
fig.add_trace(go.Scatter(x=train.index, y=train, mode='lines', name='Training', line=dict(color='blue'))) |
|
|
fig.add_trace(go.Scatter(x=test.index, y=test, mode='lines', name='Actual', line=dict(color='green'))) |
|
|
fig.add_trace(go.Scatter(x=test.index, y=forecast, mode='lines+markers', name='Forecast', line=dict(color='red', dash='dash'))) |
|
|
fig.update_layout( |
|
|
title=f"{model_choice} Forecast", |
|
|
xaxis_title="Date", |
|
|
yaxis_title=value_col, |
|
|
legend=dict(x=0, y=1) |
|
|
) |
|
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
|
|
|
st.subheader("🔮 Forecast Future Periods") |
|
|
future_periods = st.number_input("Number of future periods to forecast:", min_value=1, max_value=365, value=30, step=1) |
|
|
|
|
|
if st.button("🚀 Generate Future Forecast"): |
|
|
try: |
|
|
if model_choice == "Holt-Winters Exponential Smoothing": |
|
|
future_forecast = model.forecast(future_periods) |
|
|
last_date = ts.index[-1] |
|
|
if freq == 'D': |
|
|
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D') |
|
|
elif freq == 'W': |
|
|
future_dates = pd.date_range(start=last_date + pd.Timedelta(weeks=1), periods=future_periods, freq='W') |
|
|
elif freq == 'M': |
|
|
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=future_periods, freq='M') |
|
|
else: |
|
|
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D') |
|
|
|
|
|
elif model_choice == "ARIMA": |
|
|
future_forecast = model.forecast(future_periods) |
|
|
last_date = ts.index[-1] |
|
|
if freq == 'D': |
|
|
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D') |
|
|
elif freq == 'W': |
|
|
future_dates = pd.date_range(start=last_date + pd.Timedelta(weeks=1), periods=future_periods, freq='W') |
|
|
elif freq == 'M': |
|
|
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=future_periods, freq='M') |
|
|
else: |
|
|
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D') |
|
|
|
|
|
elif model_choice == "Prophet": |
|
|
last_date = ts.index[-1] |
|
|
if freq == 'D': |
|
|
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D') |
|
|
elif freq == 'W': |
|
|
future_dates = pd.date_range(start=last_date + pd.Timedelta(weeks=1), periods=future_periods, freq='W') |
|
|
elif freq == 'M': |
|
|
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=future_periods, freq='M') |
|
|
else: |
|
|
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D') |
|
|
|
|
|
future_df = pd.DataFrame({'ds': future_dates}) |
|
|
forecast_df = model.predict(future_df) |
|
|
future_forecast = forecast_df['yhat'].values |
|
|
|
|
|
|
|
|
fig_future = go.Figure() |
|
|
fig_future.add_trace(go.Scatter(x=ts.index, y=ts.values, mode='lines', name='Historical', line=dict(color='blue'))) |
|
|
fig_future.add_trace(go.Scatter(x=future_dates, y=future_forecast, mode='lines+markers', name='Future Forecast', line=dict(color='red', dash='dash'))) |
|
|
fig_future.update_layout( |
|
|
title="Future Forecast", |
|
|
xaxis_title="Date", |
|
|
yaxis_title=value_col |
|
|
) |
|
|
st.plotly_chart(fig_future, use_container_width=True) |
|
|
|
|
|
|
|
|
forecast_df = pd.DataFrame({ |
|
|
'Date': future_dates, |
|
|
'Forecast': future_forecast |
|
|
}) |
|
|
with st.expander("📋 View Forecast Table"): |
|
|
st.dataframe(forecast_df) |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Could not generate future forecast: {e}") |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.caption(f"© {AUTHOR} | License {LICENSE} | Contact: {EMAIL}") |