Spaces:
Running
Running
File size: 6,995 Bytes
32d870a 686c1e1 3cee5b7 92f4bb2 06412fb 32d870a 92f4bb2 686c1e1 92f4bb2 686c1e1 06412fb 3cee5b7 06412fb 3cee5b7 686c1e1 ec72f78 73852fc ec72f78 3cee5b7 92f4bb2 3cee5b7 06412fb 3cee5b7 06412fb 3cee5b7 686c1e1 3cee5b7 06412fb 3cee5b7 06412fb 3cee5b7 06412fb 3cee5b7 06412fb 3cee5b7 ec72f78 3cee5b7 686c1e1 ec72f78 06412fb ec72f78 06412fb ec72f78 06412fb 686c1e1 ec72f78 06412fb da5122f 06412fb ec72f78 da5122f ec72f78 32511bd ec72f78 3cee5b7 32511bd 686c1e1 06412fb 32511bd 686c1e1 06412fb da5122f 06412fb ec72f78 06412fb ec72f78 da5122f 3cee5b7 06412fb 3cee5b7 da5122f 686c1e1 3cee5b7 06412fb 3cee5b7 06412fb 3cee5b7 06412fb 3cee5b7 06412fb b62199f ec72f78 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
import streamlit as st
import pandas as pd
import torch
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime, timedelta
from models import ModelConfig, load_model_pipeline
from data import DataConfig, process_input, fetch_data_with_fallback
# Load the forecasting model pipeline
@st.cache_resource
def load_pipeline(model_name):
"""Load and cache the model pipeline"""
return load_model_pipeline(model_name, device_map="cpu", dtype=torch.float32)
# Fetch data with caching
@st.cache_data(ttl=3600) # Cache for 1 hour
def fetch_data(source_name, days_back=180):
"""Fetch data from specified source with caching"""
return fetch_data_with_fallback(source_name, days_back)
# Streamlit app interface
st.title("⚡ Electricity Market Price Forecasting")
st.write("""
This application uses **Amazon Chronos** pretrained models for zero-shot time series forecasting on electricity market data.
Select a model, choose your data source, and evaluate forecasting performance with backtesting on real ERCOT prices.
""")
# Model selection
available_model_names = ModelConfig.get_model_names()
selected_model_name = st.selectbox(
"Select Forecasting Model:",
options=available_model_names,
index=0 # Default to first model (Chronos-2)
)
# Load the selected model
with st.spinner(f"Loading {selected_model_name}..."):
pipeline = load_pipeline(selected_model_name)
# Data source selection
available_sources = DataConfig.get_source_names()
data_source = st.radio(
"Select Data Source:",
available_sources + ["Custom Data"],
index=0
)
# Input field for user-provided data
if data_source == "Custom Data":
user_input = st.text_area(
"Enter time series data (comma-separated values):",
"",
height=150
)
data_source_used = "Custom"
error_msg = None
else:
# Fetch data from selected source
with st.spinner(f"Fetching data from {data_source}..."):
default_data, data_source_used, error_msg = fetch_data(data_source)
if error_msg:
st.warning(f"⚠️ {error_msg}\nUsing sample data instead.")
user_input = st.text_area(
f"{data_source_used} - Daily Average Prices ($/MWh):",
default_data.strip(),
height=150
)
if "ERCOT" in data_source_used:
st.info("💡 Live data from ERCOT's Day-Ahead Market (DAM SPP) - Daily average prices across all settlement points")
try:
time_series_data = process_input(user_input)
except ValueError:
st.error("Please make sure all values are numbers, separated by commas.")
time_series_data = [] # Set empty data on error to prevent further processing
# Select the forecast window for backtesting
max_test_days = min(64, len(time_series_data) - 10) if len(time_series_data) > 10 else 1
prediction_length = st.slider(
"Forecast Horizon (Days to Backtest)",
min_value=1,
max_value=max_test_days,
value=min(14, max_test_days),
help="The model will use historical context to forecast the last N days, then compare predictions with actual values to evaluate performance."
)
# If data is valid, perform the forecast
if time_series_data:
# Split data into context (historical) and test
context_length = len(time_series_data) - prediction_length
context_data = time_series_data[:context_length]
test_data = time_series_data[context_length:]
# Create timestamps
end_date = datetime.now()
start_date = end_date - timedelta(days=len(time_series_data) - 1)
all_dates = pd.date_range(start=start_date, periods=len(time_series_data), freq='D')
context_dates = all_dates[:context_length]
test_dates = all_dates[context_length:]
# Create a DataFrame with context for the model
context_df = pd.DataFrame({
'timestamp': context_dates,
'target': context_data,
'id': 'ercot_prices'
})
# Make the forecast using the model
with st.spinner("Generating forecast..."):
pred_df = pipeline.predict_df(
context_df,
prediction_length=prediction_length,
quantile_levels=[0.1, 0.5, 0.9],
id_column="id",
timestamp_column="timestamp",
target="target",
)
# Extract predictions
median = pred_df["predictions"].values
low = pred_df["0.1"].values
high = pred_df["0.9"].values
# Calculate error metrics
mae = np.mean(np.abs(np.array(test_data) - median))
mape = np.mean(np.abs((np.array(test_data) - median) / np.array(test_data))) * 100
rmse = np.sqrt(np.mean((np.array(test_data) - median) ** 2))
# Plot the historical and forecasted data with dates
plt.figure(figsize=(14, 7))
plt.plot(context_dates, context_data, color="royalblue", label="Historical Context", linewidth=2)
plt.plot(test_dates, test_data, color="green", label="Actual Values", linewidth=2, marker='o', markersize=4)
plt.plot(test_dates, median, color="tomato", label="Forecast", linewidth=2, linestyle='--', marker='s', markersize=4)
plt.fill_between(test_dates, low, high, color="tomato", alpha=0.3, label="80% Prediction Interval")
plt.axvline(x=context_dates[-1], color='gray', linestyle=':', linewidth=1, alpha=0.7)
plt.text(context_dates[-1], plt.ylim()[1]*0.95, ' Forecast Window', fontsize=10, color='gray')
plt.xlabel("Date")
plt.ylabel("Price ($/MWh)")
plt.title(f"ERCOT Electricity Price Forecast - {prediction_length} Day Test Window")
plt.legend(loc='best')
plt.grid(alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
# Show the plot in the Streamlit app
st.pyplot(plt)
# Display forecast statistics and error metrics
st.write("### Model Performance Metrics")
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("MAE", f"${mae:.2f}")
with col2:
st.metric("RMSE", f"${rmse:.2f}")
with col3:
st.metric("MAPE", f"{mape:.2f}%")
with col4:
st.metric("Avg Actual", f"${np.mean(test_data):.2f}/MWh")
# Show detailed comparison table
with st.expander("View Detailed Comparison"):
comparison_df = pd.DataFrame({
'Date': test_dates.strftime('%Y-%m-%d'),
'Actual': test_data,
'Forecast': median.round(2),
'Error': (median - np.array(test_data)).round(2),
'Error %': ((median - np.array(test_data)) / np.array(test_data) * 100).round(2)
})
st.dataframe(comparison_df, use_container_width=True)
# Note for comments, feedback, or questions
st.write("### About")
st.write("""
**Features:**
- 🤖 Multiple Chronos models (Chronos-2 and T5 variants)
- 📊 Real-time ERCOT electricity market data
- 🎯 Backtesting with error metrics (MAE, RMSE, MAPE)
- 📈 Visual comparison of forecasts vs actual values
- 🔧 Modular architecture for easy extension
For questions or feedback, reach out on [LinkedIn](https://www.linkedin.com/in/javadbayazi/).
""") |