File size: 4,856 Bytes
c3e391e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | import streamlit as st
import pandas as pd
import plotly.graph_objs as go
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
# Set Streamlit page configuration
st.set_page_config(page_title="ARIMA Forecasting with Streamlit", layout="wide")
# Title of the Streamlit app
st.title("📈 Time Series Forecasting with ARIMA for Vegetable Prices")
# Sidebar configuration for user inputs
st.sidebar.header("User Configuration")
file_path = st.sidebar.text_input("Enter the path to your CSV file", 'arima.csv')
p = st.sidebar.number_input("ARIMA Parameter p (AR term)", min_value=0, max_value=5, value=1)
d = st.sidebar.number_input("ARIMA Parameter d (Differencing)", min_value=0, max_value=2, value=1)
q = st.sidebar.number_input("ARIMA Parameter q (MA term)", min_value=0, max_value=5, value=1)
# Load and preprocess data
try:
data = pd.read_csv(file_path)
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y', errors='coerce')
data = data.dropna(subset=['Date', 'Average'])
commodities = data['Commodity'].unique()
except FileNotFoundError:
st.error("Data file not found. Please check the file path and try again.")
st.stop()
# Sidebar for user input to select a commodity
selected_commodity = st.sidebar.selectbox("Select a Vegetable Commodity", commodities)
# Filter data based on the selected commodity and sort by date
commodity_data = data[data['Commodity'] == selected_commodity].sort_values('Date')
# Display data and perform ADF Test
st.subheader(f"Data Overview and Stationarity Check for '{selected_commodity}'")
st.write(commodity_data.head())
# Perform the Augmented Dickey-Fuller (ADF) test
adf_result = adfuller(commodity_data['Average'])
is_stationary = adf_result[1] < 0.05
# Display ADF test results
with st.expander(f"Augmented Dickey-Fuller Test Results for '{selected_commodity}'", expanded=False):
st.write(f"ADF Statistic: {adf_result[0]:.4f}")
st.write(f"p-value: {adf_result[1]:.4f}")
st.write("Critical Values:")
for key, value in adf_result[4].items():
st.write(f" {key}: {value:.4f}")
st.success(f"The time series is {'stationary' if is_stationary else 'not stationary'} (p-value {'<' if is_stationary else '>='} 0.05).")
# ARIMA model fitting with user-selected parameters
st.subheader(f"ARIMA Model Fitting and Summary for '{selected_commodity}'")
model = ARIMA(commodity_data['Average'], order=(p, d, q))
model_fit = model.fit()
# Display model summary
with st.expander("ARIMA Model Summary", expanded=False):
st.write(model_fit.summary())
# Forecast future values up to December 31, 2025
last_date = commodity_data['Date'].max()
forecast_end_date = pd.to_datetime('2025-12-31')
forecast_periods = (forecast_end_date - last_date).days # Calculate days until end of 2025
# Make forecast
forecast = model_fit.get_forecast(steps=forecast_periods)
forecast_index = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=forecast_periods)
forecast_values = forecast.predicted_mean
conf_int = forecast.conf_int()
# Plotly graph for interactive visualization
st.subheader(f"Forecast Visualization for '{selected_commodity}' until {forecast_end_date.date()}")
fig = go.Figure()
# Plot historical data
fig.add_trace(go.Scatter(
x=commodity_data['Date'],
y=commodity_data['Average'],
mode='lines+markers',
name='Historical Data',
line=dict(color='royalblue', width=2)
))
# Plot forecasted data
fig.add_trace(go.Scatter(
x=forecast_index,
y=forecast_values,
mode='lines+markers',
name='Forecast',
line=dict(color='red', width=2, dash='dash'),
hovertemplate='Date: %{x}<br>Price: %{y:.2f}<extra></extra>'
))
# Plot confidence intervals
fig.add_trace(go.Scatter(
x=forecast_index.tolist() + forecast_index[::-1].tolist(),
y=conf_int.iloc[:, 0].tolist() + conf_int.iloc[:, 1][::-1].tolist(),
fill='toself',
fillcolor='rgba(173, 216, 230,0.2)',
line=dict(color='rgba(255,255,255,0)'),
name='Confidence Interval'
))
# Update layout for a better presentation
fig.update_layout(
title=f"ARIMA Forecast for '{selected_commodity}' Prices until 2025",
xaxis_title='Date',
yaxis_title='Average Price (in Kg)',
legend=dict(x=0.01, y=0.99),
template='plotly_white',
hovermode='x unified'
)
# Display Plotly chart
st.plotly_chart(fig, use_container_width=True)
# Display forecasted values in a table format for better visibility
st.subheader(f"Forecasted Prices for '{selected_commodity}' until 2025")
forecast_table = pd.DataFrame({
'Date': forecast_index,
'Forecasted Price': forecast_values,
'Lower Confidence Interval': conf_int.iloc[:, 0],
'Upper Confidence Interval': conf_int.iloc[:, 1]
})
st.dataframe(forecast_table)
st.info("Adjust the ARIMA parameters in the sidebar to see different results.")
|