arima / app.py
ojas121's picture
Create app.py
c3e391e verified
import streamlit as st
import pandas as pd
import plotly.graph_objs as go
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
# Set Streamlit page configuration
st.set_page_config(page_title="ARIMA Forecasting with Streamlit", layout="wide")
# Title of the Streamlit app
st.title("📈 Time Series Forecasting with ARIMA for Vegetable Prices")
# Sidebar configuration for user inputs
st.sidebar.header("User Configuration")
file_path = st.sidebar.text_input("Enter the path to your CSV file", 'arima.csv')
p = st.sidebar.number_input("ARIMA Parameter p (AR term)", min_value=0, max_value=5, value=1)
d = st.sidebar.number_input("ARIMA Parameter d (Differencing)", min_value=0, max_value=2, value=1)
q = st.sidebar.number_input("ARIMA Parameter q (MA term)", min_value=0, max_value=5, value=1)
# Load and preprocess data
try:
data = pd.read_csv(file_path)
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y', errors='coerce')
data = data.dropna(subset=['Date', 'Average'])
commodities = data['Commodity'].unique()
except FileNotFoundError:
st.error("Data file not found. Please check the file path and try again.")
st.stop()
# Sidebar for user input to select a commodity
selected_commodity = st.sidebar.selectbox("Select a Vegetable Commodity", commodities)
# Filter data based on the selected commodity and sort by date
commodity_data = data[data['Commodity'] == selected_commodity].sort_values('Date')
# Display data and perform ADF Test
st.subheader(f"Data Overview and Stationarity Check for '{selected_commodity}'")
st.write(commodity_data.head())
# Perform the Augmented Dickey-Fuller (ADF) test
adf_result = adfuller(commodity_data['Average'])
is_stationary = adf_result[1] < 0.05
# Display ADF test results
with st.expander(f"Augmented Dickey-Fuller Test Results for '{selected_commodity}'", expanded=False):
st.write(f"ADF Statistic: {adf_result[0]:.4f}")
st.write(f"p-value: {adf_result[1]:.4f}")
st.write("Critical Values:")
for key, value in adf_result[4].items():
st.write(f" {key}: {value:.4f}")
st.success(f"The time series is {'stationary' if is_stationary else 'not stationary'} (p-value {'<' if is_stationary else '>='} 0.05).")
# ARIMA model fitting with user-selected parameters
st.subheader(f"ARIMA Model Fitting and Summary for '{selected_commodity}'")
model = ARIMA(commodity_data['Average'], order=(p, d, q))
model_fit = model.fit()
# Display model summary
with st.expander("ARIMA Model Summary", expanded=False):
st.write(model_fit.summary())
# Forecast future values up to December 31, 2025
last_date = commodity_data['Date'].max()
forecast_end_date = pd.to_datetime('2025-12-31')
forecast_periods = (forecast_end_date - last_date).days # Calculate days until end of 2025
# Make forecast
forecast = model_fit.get_forecast(steps=forecast_periods)
forecast_index = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=forecast_periods)
forecast_values = forecast.predicted_mean
conf_int = forecast.conf_int()
# Plotly graph for interactive visualization
st.subheader(f"Forecast Visualization for '{selected_commodity}' until {forecast_end_date.date()}")
fig = go.Figure()
# Plot historical data
fig.add_trace(go.Scatter(
x=commodity_data['Date'],
y=commodity_data['Average'],
mode='lines+markers',
name='Historical Data',
line=dict(color='royalblue', width=2)
))
# Plot forecasted data
fig.add_trace(go.Scatter(
x=forecast_index,
y=forecast_values,
mode='lines+markers',
name='Forecast',
line=dict(color='red', width=2, dash='dash'),
hovertemplate='Date: %{x}<br>Price: %{y:.2f}<extra></extra>'
))
# Plot confidence intervals
fig.add_trace(go.Scatter(
x=forecast_index.tolist() + forecast_index[::-1].tolist(),
y=conf_int.iloc[:, 0].tolist() + conf_int.iloc[:, 1][::-1].tolist(),
fill='toself',
fillcolor='rgba(173, 216, 230,0.2)',
line=dict(color='rgba(255,255,255,0)'),
name='Confidence Interval'
))
# Update layout for a better presentation
fig.update_layout(
title=f"ARIMA Forecast for '{selected_commodity}' Prices until 2025",
xaxis_title='Date',
yaxis_title='Average Price (in Kg)',
legend=dict(x=0.01, y=0.99),
template='plotly_white',
hovermode='x unified'
)
# Display Plotly chart
st.plotly_chart(fig, use_container_width=True)
# Display forecasted values in a table format for better visibility
st.subheader(f"Forecasted Prices for '{selected_commodity}' until 2025")
forecast_table = pd.DataFrame({
'Date': forecast_index,
'Forecasted Price': forecast_values,
'Lower Confidence Interval': conf_int.iloc[:, 0],
'Upper Confidence Interval': conf_int.iloc[:, 1]
})
st.dataframe(forecast_table)
st.info("Adjust the ARIMA parameters in the sidebar to see different results.")