SimpleTS / src /streamlit_app.py
enacimie's picture
Update src/streamlit_app.py
f5aed0c verified
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
import io
import warnings
warnings.filterwarnings("ignore")
# Metadata
AUTHOR = "Eduardo Nacimiento García"
EMAIL = "enacimie@ull.edu.es"
LICENSE = "Apache 2.0"
# Page config
st.set_page_config(
page_title="SimpleTS",
page_icon="📈",
layout="wide",
initial_sidebar_state="expanded",
)
# Title
st.title("📈 SimpleTS")
st.markdown(f"**Author:** {AUTHOR} | **Email:** {EMAIL} | **License:** {LICENSE}")
st.write("""
Upload a time series CSV or use the demo dataset to visualize, analyze, and forecast your data.
""")
# === GENERATE DEMO TIME SERIES ===
@st.cache_data
def create_demo_ts(freq='D', periods=365):
np.random.seed(42)
date_rng = pd.date_range(start='2023-01-01', periods=periods, freq=freq)
# Create trend + seasonality + noise
trend = np.linspace(100, 200, periods)
if freq in ['D', 'W']:
seasonality = 20 * np.sin(2 * np.pi * np.arange(periods) / 365.25)
elif freq == 'M':
seasonality = 25 * np.sin(2 * np.pi * np.arange(periods) / 12)
noise = np.random.normal(0, 5, periods)
values = trend + seasonality + noise
df = pd.DataFrame({
'Date': date_rng,
'Value': values
})
return df
# === LOAD DATA ===
if "demo_loaded" not in st.session_state:
st.session_state.demo_loaded = False
st.session_state.freq = 'D'
col1, col2, col3 = st.columns(3)
with col1:
if st.button("🧪 Load Daily Demo"):
st.session_state.demo_loaded = True
st.session_state.freq = 'D'
st.session_state.df = create_demo_ts('D', 365)
st.success("✅ Daily demo loaded!")
with col2:
if st.button("🧪 Load Monthly Demo"):
st.session_state.demo_loaded = True
st.session_state.freq = 'M'
st.session_state.df = create_demo_ts('M', 48)
st.success("✅ Monthly demo loaded!")
with col3:
if st.button("🧪 Load Weekly Demo"):
st.session_state.demo_loaded = True
st.session_state.freq = 'W'
st.session_state.df = create_demo_ts('W', 104)
st.success("✅ Weekly demo loaded!")
uploaded_file = st.file_uploader("📂 Upload your time series CSV (must have a date and a value column)", type=["csv"])
# Use demo or uploaded file
if uploaded_file:
df = pd.read_csv(uploaded_file)
st.session_state.df = df
st.session_state.demo_loaded = False
st.success("✅ File uploaded successfully.")
elif "df" in st.session_state:
df = st.session_state.df
freq = st.session_state.freq
if st.session_state.demo_loaded:
st.info(f"Using **{freq}** frequency demo dataset.")
else:
df = None
st.info("👆 Upload a CSV or load a demo dataset to begin.")
st.stop()
# Show data preview
with st.expander("🔍 Data Preview (first 10 rows)"):
st.dataframe(df.head(10))
# === SELECT DATE AND VALUE COLUMNS ===
st.subheader("📅 Configure Time Series")
date_col = st.selectbox("Select date column:", df.columns)
value_col = st.selectbox("Select value column:", [col for col in df.columns if col != date_col])
# Convert to datetime and set index
try:
df[date_col] = pd.to_datetime(df[date_col])
df = df.set_index(date_col).sort_index()
ts = df[value_col]
st.success("✅ Time series configured successfully.")
except Exception as e:
st.error(f"❌ Error processing date column: {e}")
st.stop()
# Plot original series
st.subheader("📊 Original Time Series")
fig = px.line(x=ts.index, y=ts.values, labels={'x': 'Date', 'y': value_col}, title="Original Time Series")
st.plotly_chart(fig, use_container_width=True)
# === TIME SERIES ANALYSIS ===
st.header("🔬 Time Series Analysis")
# Stationarity test (ADF)
st.subheader("📉 Stationarity Test (ADF)")
adf_result = adfuller(ts.dropna())
st.write(f"- **ADF Statistic:** {adf_result[0]:.4f}")
st.write(f"- **p-value:** {adf_result[1]:.4f}")
if adf_result[1] < 0.05:
st.success("🟢 Series is stationary (p < 0.05)")
else:
st.warning("🟠 Series is non-stationary (p >= 0.05) — consider differencing")
# Seasonal Decomposition
st.subheader("🎯 Seasonal Decomposition")
period_options = {
'D': 365,
'W': 52,
'M': 12,
'Q': 4,
'Y': 1
}
freq = st.session_state.freq if st.session_state.demo_loaded else 'D'
default_period = period_options.get(freq, 12)
period = st.number_input("Seasonal period (e.g., 12 for monthly, 365 for daily):",
min_value=2, value=default_period, step=1)
try:
decomposition = seasonal_decompose(ts.dropna(), model='additive', period=int(period), extrapolate_trend='freq')
# Plot decomposition
fig = go.Figure()
fig.add_trace(go.Scatter(x=decomposition.observed.index, y=decomposition.observed, mode='lines', name='Observed'))
fig.add_trace(go.Scatter(x=decomposition.trend.index, y=decomposition.trend, mode='lines', name='Trend'))
fig.add_trace(go.Scatter(x=decomposition.seasonal.index, y=decomposition.seasonal, mode='lines', name='Seasonal'))
fig.add_trace(go.Scatter(x=decomposition.resid.index, y=decomposition.resid, mode='lines', name='Residual'))
fig.update_layout(title="Seasonal Decomposition", height=600)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Could not decompose series: {e}")
# ACF / PACF Plots (CORREGIDO)
st.subheader("🔗 Autocorrelation (ACF) & Partial Autocorrelation (PACF)")
# Calculate safe max lags (must be < 50% of sample size for PACF)
n = len(ts.dropna())
safe_max_lag = max(1, int(n * 0.49)) # Must be strictly less than 50%
# Adjust slider dynamically
max_lags_default = min(40, safe_max_lag)
max_lags = st.slider(
"Max lags:",
min_value=1,
max_value=safe_max_lag,
value=max_lags_default,
step=1,
help=f"Max allowed lags: {safe_max_lag} (based on sample size: {n})"
)
col1, col2 = st.columns(2)
with col1:
st.write("**ACF Plot**")
fig_acf, ax_acf = plt.subplots(figsize=(6, 4))
plot_acf(ts.dropna(), lags=max_lags, ax=ax_acf)
st.pyplot(fig_acf)
with col2:
st.write("**PACF Plot**")
try:
fig_pacf, ax_pacf = plt.subplots(figsize=(6, 4))
plot_pacf(ts.dropna(), lags=max_lags, ax=ax_pacf)
st.pyplot(fig_pacf)
except Exception as e:
st.error(f"Could not generate PACF plot: {e}")
st.write("Try reducing the number of lags.")
# === FORECASTING MODELS ===
st.header("🤖 Forecasting Models")
# Train/test split
test_size = st.slider("Test set size (as % of data):", min_value=5, max_value=40, value=20, step=5)
split_point = int(len(ts) * (1 - test_size/100))
train, test = ts[:split_point], ts[split_point:]
st.write(f"Training on {len(train)} points, testing on {len(test)} points.")
model_choice = st.selectbox("Choose forecasting model:",
["Holt-Winters Exponential Smoothing", "ARIMA", "Prophet"])
# Initialize forecast variable
forecast = None
model = None
if model_choice == "Holt-Winters Exponential Smoothing":
seasonal_periods = st.number_input("Seasonal periods:", min_value=2, value=period, step=1)
try:
hw_model = ExponentialSmoothing(
train,
trend='add',
seasonal='add',
seasonal_periods=seasonal_periods
).fit()
forecast = hw_model.forecast(len(test))
model = hw_model
except Exception as e:
st.error(f"Could not fit Holt-Winters model: {e}")
elif model_choice == "ARIMA":
col1, col2, col3 = st.columns(3)
p = col1.number_input("AR order (p):", min_value=0, max_value=5, value=1)
d = col2.number_input("Differencing order (d):", min_value=0, max_value=2, value=1)
q = col3.number_input("MA order (q):", min_value=0, max_value=5, value=1)
try:
arima_model = ARIMA(train, order=(p, d, q)).fit()
forecast = arima_model.forecast(len(test))
model = arima_model
except Exception as e:
st.error(f"Could not fit ARIMA model: {e}")
elif model_choice == "Prophet":
# Prepare data for Prophet
prophet_df = pd.DataFrame({
'ds': train.index,
'y': train.values
})
try:
prophet_model = Prophet(
yearly_seasonality=True if freq in ['D', 'W'] else False,
weekly_seasonality=True if freq == 'D' else False,
daily_seasonality=False
)
if freq == 'M':
prophet_model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
prophet_model.fit(prophet_df)
# Forecast
future = pd.DataFrame({'ds': test.index})
forecast_df = prophet_model.predict(future)
forecast = forecast_df['yhat'].values
model = prophet_model
except Exception as e:
st.error(f"Could not fit Prophet model: {e}")
# Show results if forecast exists
if forecast is not None:
# Metrics
mae = mean_absolute_error(test, forecast)
mse = mean_squared_error(test, forecast)
rmse = np.sqrt(mse)
st.subheader("📈 Forecast Results")
col1, col2, col3 = st.columns(3)
col1.metric("MAE", f"{mae:.2f}")
col2.metric("MSE", f"{mse:.2f}")
col3.metric("RMSE", f"{rmse:.2f}")
# Plot forecast vs actual
fig = go.Figure()
fig.add_trace(go.Scatter(x=train.index, y=train, mode='lines', name='Training', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=test.index, y=test, mode='lines', name='Actual', line=dict(color='green')))
fig.add_trace(go.Scatter(x=test.index, y=forecast, mode='lines+markers', name='Forecast', line=dict(color='red', dash='dash')))
fig.update_layout(
title=f"{model_choice} Forecast",
xaxis_title="Date",
yaxis_title=value_col,
legend=dict(x=0, y=1)
)
st.plotly_chart(fig, use_container_width=True)
# Allow forecasting into future
st.subheader("🔮 Forecast Future Periods")
future_periods = st.number_input("Number of future periods to forecast:", min_value=1, max_value=365, value=30, step=1)
if st.button("🚀 Generate Future Forecast"):
try:
if model_choice == "Holt-Winters Exponential Smoothing":
future_forecast = model.forecast(future_periods)
last_date = ts.index[-1]
if freq == 'D':
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D')
elif freq == 'W':
future_dates = pd.date_range(start=last_date + pd.Timedelta(weeks=1), periods=future_periods, freq='W')
elif freq == 'M':
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=future_periods, freq='M')
else:
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D')
elif model_choice == "ARIMA":
future_forecast = model.forecast(future_periods)
last_date = ts.index[-1]
if freq == 'D':
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D')
elif freq == 'W':
future_dates = pd.date_range(start=last_date + pd.Timedelta(weeks=1), periods=future_periods, freq='W')
elif freq == 'M':
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=future_periods, freq='M')
else:
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D')
elif model_choice == "Prophet":
last_date = ts.index[-1]
if freq == 'D':
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D')
elif freq == 'W':
future_dates = pd.date_range(start=last_date + pd.Timedelta(weeks=1), periods=future_periods, freq='W')
elif freq == 'M':
future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=future_periods, freq='M')
else:
future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D')
future_df = pd.DataFrame({'ds': future_dates})
forecast_df = model.predict(future_df)
future_forecast = forecast_df['yhat'].values
# Plot future forecast
fig_future = go.Figure()
fig_future.add_trace(go.Scatter(x=ts.index, y=ts.values, mode='lines', name='Historical', line=dict(color='blue')))
fig_future.add_trace(go.Scatter(x=future_dates, y=future_forecast, mode='lines+markers', name='Future Forecast', line=dict(color='red', dash='dash')))
fig_future.update_layout(
title="Future Forecast",
xaxis_title="Date",
yaxis_title=value_col
)
st.plotly_chart(fig_future, use_container_width=True)
# Show as table
forecast_df = pd.DataFrame({
'Date': future_dates,
'Forecast': future_forecast
})
with st.expander("📋 View Forecast Table"):
st.dataframe(forecast_df)
except Exception as e:
st.error(f"Could not generate future forecast: {e}")
# Footer
st.markdown("---")
st.caption(f"© {AUTHOR} | License {LICENSE} | Contact: {EMAIL}")