import streamlit as st import pandas as pd import numpy as np import plotly.express as px import plotly.graph_objects as go from statsmodels.tsa.seasonal import seasonal_decompose from statsmodels.tsa.stattools import adfuller, acf, pacf from statsmodels.graphics.tsaplots import plot_acf, plot_pacf from statsmodels.tsa.holtwinters import ExponentialSmoothing from statsmodels.tsa.arima.model import ARIMA from prophet import Prophet from sklearn.metrics import mean_absolute_error, mean_squared_error import matplotlib.pyplot as plt import io import warnings warnings.filterwarnings("ignore") # Metadata AUTHOR = "Eduardo Nacimiento Garcรญa" EMAIL = "enacimie@ull.edu.es" LICENSE = "Apache 2.0" # Page config st.set_page_config( page_title="SimpleTS", page_icon="๐Ÿ“ˆ", layout="wide", initial_sidebar_state="expanded", ) # Title st.title("๐Ÿ“ˆ SimpleTS") st.markdown(f"**Author:** {AUTHOR} | **Email:** {EMAIL} | **License:** {LICENSE}") st.write(""" Upload a time series CSV or use the demo dataset to visualize, analyze, and forecast your data. """) # === GENERATE DEMO TIME SERIES === @st.cache_data def create_demo_ts(freq='D', periods=365): np.random.seed(42) date_rng = pd.date_range(start='2023-01-01', periods=periods, freq=freq) # Create trend + seasonality + noise trend = np.linspace(100, 200, periods) if freq in ['D', 'W']: seasonality = 20 * np.sin(2 * np.pi * np.arange(periods) / 365.25) elif freq == 'M': seasonality = 25 * np.sin(2 * np.pi * np.arange(periods) / 12) noise = np.random.normal(0, 5, periods) values = trend + seasonality + noise df = pd.DataFrame({ 'Date': date_rng, 'Value': values }) return df # === LOAD DATA === if "demo_loaded" not in st.session_state: st.session_state.demo_loaded = False st.session_state.freq = 'D' col1, col2, col3 = st.columns(3) with col1: if st.button("๐Ÿงช Load Daily Demo"): st.session_state.demo_loaded = True st.session_state.freq = 'D' st.session_state.df = create_demo_ts('D', 365) st.success("โœ… Daily demo loaded!") with col2: if st.button("๐Ÿงช Load Monthly Demo"): st.session_state.demo_loaded = True st.session_state.freq = 'M' st.session_state.df = create_demo_ts('M', 48) st.success("โœ… Monthly demo loaded!") with col3: if st.button("๐Ÿงช Load Weekly Demo"): st.session_state.demo_loaded = True st.session_state.freq = 'W' st.session_state.df = create_demo_ts('W', 104) st.success("โœ… Weekly demo loaded!") uploaded_file = st.file_uploader("๐Ÿ“‚ Upload your time series CSV (must have a date and a value column)", type=["csv"]) # Use demo or uploaded file if uploaded_file: df = pd.read_csv(uploaded_file) st.session_state.df = df st.session_state.demo_loaded = False st.success("โœ… File uploaded successfully.") elif "df" in st.session_state: df = st.session_state.df freq = st.session_state.freq if st.session_state.demo_loaded: st.info(f"Using **{freq}** frequency demo dataset.") else: df = None st.info("๐Ÿ‘† Upload a CSV or load a demo dataset to begin.") st.stop() # Show data preview with st.expander("๐Ÿ” Data Preview (first 10 rows)"): st.dataframe(df.head(10)) # === SELECT DATE AND VALUE COLUMNS === st.subheader("๐Ÿ“… Configure Time Series") date_col = st.selectbox("Select date column:", df.columns) value_col = st.selectbox("Select value column:", [col for col in df.columns if col != date_col]) # Convert to datetime and set index try: df[date_col] = pd.to_datetime(df[date_col]) df = df.set_index(date_col).sort_index() ts = df[value_col] st.success("โœ… Time series configured successfully.") except Exception as e: st.error(f"โŒ Error processing date column: {e}") st.stop() # Plot original series st.subheader("๐Ÿ“Š Original Time Series") fig = px.line(x=ts.index, y=ts.values, labels={'x': 'Date', 'y': value_col}, title="Original Time Series") st.plotly_chart(fig, use_container_width=True) # === TIME SERIES ANALYSIS === st.header("๐Ÿ”ฌ Time Series Analysis") # Stationarity test (ADF) st.subheader("๐Ÿ“‰ Stationarity Test (ADF)") adf_result = adfuller(ts.dropna()) st.write(f"- **ADF Statistic:** {adf_result[0]:.4f}") st.write(f"- **p-value:** {adf_result[1]:.4f}") if adf_result[1] < 0.05: st.success("๐ŸŸข Series is stationary (p < 0.05)") else: st.warning("๐ŸŸ  Series is non-stationary (p >= 0.05) โ€” consider differencing") # Seasonal Decomposition st.subheader("๐ŸŽฏ Seasonal Decomposition") period_options = { 'D': 365, 'W': 52, 'M': 12, 'Q': 4, 'Y': 1 } freq = st.session_state.freq if st.session_state.demo_loaded else 'D' default_period = period_options.get(freq, 12) period = st.number_input("Seasonal period (e.g., 12 for monthly, 365 for daily):", min_value=2, value=default_period, step=1) try: decomposition = seasonal_decompose(ts.dropna(), model='additive', period=int(period), extrapolate_trend='freq') # Plot decomposition fig = go.Figure() fig.add_trace(go.Scatter(x=decomposition.observed.index, y=decomposition.observed, mode='lines', name='Observed')) fig.add_trace(go.Scatter(x=decomposition.trend.index, y=decomposition.trend, mode='lines', name='Trend')) fig.add_trace(go.Scatter(x=decomposition.seasonal.index, y=decomposition.seasonal, mode='lines', name='Seasonal')) fig.add_trace(go.Scatter(x=decomposition.resid.index, y=decomposition.resid, mode='lines', name='Residual')) fig.update_layout(title="Seasonal Decomposition", height=600) st.plotly_chart(fig, use_container_width=True) except Exception as e: st.error(f"Could not decompose series: {e}") # ACF / PACF Plots (CORREGIDO) st.subheader("๐Ÿ”— Autocorrelation (ACF) & Partial Autocorrelation (PACF)") # Calculate safe max lags (must be < 50% of sample size for PACF) n = len(ts.dropna()) safe_max_lag = max(1, int(n * 0.49)) # Must be strictly less than 50% # Adjust slider dynamically max_lags_default = min(40, safe_max_lag) max_lags = st.slider( "Max lags:", min_value=1, max_value=safe_max_lag, value=max_lags_default, step=1, help=f"Max allowed lags: {safe_max_lag} (based on sample size: {n})" ) col1, col2 = st.columns(2) with col1: st.write("**ACF Plot**") fig_acf, ax_acf = plt.subplots(figsize=(6, 4)) plot_acf(ts.dropna(), lags=max_lags, ax=ax_acf) st.pyplot(fig_acf) with col2: st.write("**PACF Plot**") try: fig_pacf, ax_pacf = plt.subplots(figsize=(6, 4)) plot_pacf(ts.dropna(), lags=max_lags, ax=ax_pacf) st.pyplot(fig_pacf) except Exception as e: st.error(f"Could not generate PACF plot: {e}") st.write("Try reducing the number of lags.") # === FORECASTING MODELS === st.header("๐Ÿค– Forecasting Models") # Train/test split test_size = st.slider("Test set size (as % of data):", min_value=5, max_value=40, value=20, step=5) split_point = int(len(ts) * (1 - test_size/100)) train, test = ts[:split_point], ts[split_point:] st.write(f"Training on {len(train)} points, testing on {len(test)} points.") model_choice = st.selectbox("Choose forecasting model:", ["Holt-Winters Exponential Smoothing", "ARIMA", "Prophet"]) # Initialize forecast variable forecast = None model = None if model_choice == "Holt-Winters Exponential Smoothing": seasonal_periods = st.number_input("Seasonal periods:", min_value=2, value=period, step=1) try: hw_model = ExponentialSmoothing( train, trend='add', seasonal='add', seasonal_periods=seasonal_periods ).fit() forecast = hw_model.forecast(len(test)) model = hw_model except Exception as e: st.error(f"Could not fit Holt-Winters model: {e}") elif model_choice == "ARIMA": col1, col2, col3 = st.columns(3) p = col1.number_input("AR order (p):", min_value=0, max_value=5, value=1) d = col2.number_input("Differencing order (d):", min_value=0, max_value=2, value=1) q = col3.number_input("MA order (q):", min_value=0, max_value=5, value=1) try: arima_model = ARIMA(train, order=(p, d, q)).fit() forecast = arima_model.forecast(len(test)) model = arima_model except Exception as e: st.error(f"Could not fit ARIMA model: {e}") elif model_choice == "Prophet": # Prepare data for Prophet prophet_df = pd.DataFrame({ 'ds': train.index, 'y': train.values }) try: prophet_model = Prophet( yearly_seasonality=True if freq in ['D', 'W'] else False, weekly_seasonality=True if freq == 'D' else False, daily_seasonality=False ) if freq == 'M': prophet_model.add_seasonality(name='monthly', period=30.5, fourier_order=5) prophet_model.fit(prophet_df) # Forecast future = pd.DataFrame({'ds': test.index}) forecast_df = prophet_model.predict(future) forecast = forecast_df['yhat'].values model = prophet_model except Exception as e: st.error(f"Could not fit Prophet model: {e}") # Show results if forecast exists if forecast is not None: # Metrics mae = mean_absolute_error(test, forecast) mse = mean_squared_error(test, forecast) rmse = np.sqrt(mse) st.subheader("๐Ÿ“ˆ Forecast Results") col1, col2, col3 = st.columns(3) col1.metric("MAE", f"{mae:.2f}") col2.metric("MSE", f"{mse:.2f}") col3.metric("RMSE", f"{rmse:.2f}") # Plot forecast vs actual fig = go.Figure() fig.add_trace(go.Scatter(x=train.index, y=train, mode='lines', name='Training', line=dict(color='blue'))) fig.add_trace(go.Scatter(x=test.index, y=test, mode='lines', name='Actual', line=dict(color='green'))) fig.add_trace(go.Scatter(x=test.index, y=forecast, mode='lines+markers', name='Forecast', line=dict(color='red', dash='dash'))) fig.update_layout( title=f"{model_choice} Forecast", xaxis_title="Date", yaxis_title=value_col, legend=dict(x=0, y=1) ) st.plotly_chart(fig, use_container_width=True) # Allow forecasting into future st.subheader("๐Ÿ”ฎ Forecast Future Periods") future_periods = st.number_input("Number of future periods to forecast:", min_value=1, max_value=365, value=30, step=1) if st.button("๐Ÿš€ Generate Future Forecast"): try: if model_choice == "Holt-Winters Exponential Smoothing": future_forecast = model.forecast(future_periods) last_date = ts.index[-1] if freq == 'D': future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D') elif freq == 'W': future_dates = pd.date_range(start=last_date + pd.Timedelta(weeks=1), periods=future_periods, freq='W') elif freq == 'M': future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=future_periods, freq='M') else: future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D') elif model_choice == "ARIMA": future_forecast = model.forecast(future_periods) last_date = ts.index[-1] if freq == 'D': future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D') elif freq == 'W': future_dates = pd.date_range(start=last_date + pd.Timedelta(weeks=1), periods=future_periods, freq='W') elif freq == 'M': future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=future_periods, freq='M') else: future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D') elif model_choice == "Prophet": last_date = ts.index[-1] if freq == 'D': future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D') elif freq == 'W': future_dates = pd.date_range(start=last_date + pd.Timedelta(weeks=1), periods=future_periods, freq='W') elif freq == 'M': future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=future_periods, freq='M') else: future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=future_periods, freq='D') future_df = pd.DataFrame({'ds': future_dates}) forecast_df = model.predict(future_df) future_forecast = forecast_df['yhat'].values # Plot future forecast fig_future = go.Figure() fig_future.add_trace(go.Scatter(x=ts.index, y=ts.values, mode='lines', name='Historical', line=dict(color='blue'))) fig_future.add_trace(go.Scatter(x=future_dates, y=future_forecast, mode='lines+markers', name='Future Forecast', line=dict(color='red', dash='dash'))) fig_future.update_layout( title="Future Forecast", xaxis_title="Date", yaxis_title=value_col ) st.plotly_chart(fig_future, use_container_width=True) # Show as table forecast_df = pd.DataFrame({ 'Date': future_dates, 'Forecast': future_forecast }) with st.expander("๐Ÿ“‹ View Forecast Table"): st.dataframe(forecast_df) except Exception as e: st.error(f"Could not generate future forecast: {e}") # Footer st.markdown("---") st.caption(f"ยฉ {AUTHOR} | License {LICENSE} | Contact: {EMAIL}")