TSF-EM / data.py
JavadBayazi's picture
Add modular data architecture and backtesting features
06412fb
"""
Data fetching and processing for electricity market price forecasting.
Handles data retrieval from various sources and preprocessing.
"""
import pandas as pd
from datetime import datetime, timedelta
from gridstatus import Ercot
class DataSource:
"""Base class for data sources"""
def fetch_data(self, days_back=180):
"""
Fetch data from the source.
Args:
days_back: Number of days of historical data to fetch
Returns:
Comma-separated string of prices, or None on error
"""
raise NotImplementedError
class ERCOTDataSource(DataSource):
"""Fetch electricity price data from ERCOT"""
def __init__(self):
self.name = "ERCOT (Texas)"
self.description = "Electric Reliability Council of Texas - Day-Ahead Market"
def fetch_data(self, days_back=180):
"""
Fetch ERCOT day-ahead market prices for the current year.
Args:
days_back: Number of days to fetch (default: 180)
Returns:
Comma-separated string of daily average prices
"""
try:
ercot = Ercot()
current_year = datetime.now().year
# Get day-ahead market settlement point prices for the year
df = ercot.get_dam_spp(year=current_year)
# Get average price per day across all locations
df['Date'] = pd.to_datetime(df['Interval Start']).dt.date
daily_prices = df.groupby('Date')['SPP'].mean()
# Get the last N days
if len(daily_prices) > days_back:
daily_prices = daily_prices.tail(days_back)
# Convert to comma-separated string
price_list = daily_prices.round(2).tolist()
return ", ".join(map(str, price_list))
except Exception as e:
raise Exception(f"Could not fetch ERCOT data: {e}")
class SampleDataSource(DataSource):
"""Fallback sample electricity price data"""
def __init__(self):
self.name = "Sample Data"
self.description = "Sample electricity price data for demonstration"
def fetch_data(self, days_back=180):
"""
Return sample electricity price data.
Returns:
Comma-separated string of sample prices
"""
sample_data = """
25.50, 24.80, 26.30, 23.90, 25.10, 27.20, 28.50, 26.70, 24.30, 23.80, 25.40, 26.10, 27.80, 29.20, 28.40,
26.90, 25.30, 24.70, 26.50, 28.10, 29.60, 31.20, 30.50, 28.80, 27.10, 25.90, 27.30, 28.70, 30.20, 32.10,
31.40, 29.70, 28.20, 26.80, 28.40, 29.80, 31.50, 33.20, 32.60, 30.90, 29.30, 27.80, 29.40, 30.90, 32.70,
34.50, 33.80, 32.10, 30.50, 28.90, 30.50, 32.10, 33.90, 35.80, 35.10, 33.30, 31.60, 30.10, 31.70, 33.40,
35.20, 37.10, 36.40, 34.60, 32.90, 31.30, 32.90, 34.60, 36.50, 38.40, 37.70, 35.80, 34.10, 32.50, 34.20,
35.90, 37.80, 39.80, 39.10, 37.10, 35.40, 33.70, 35.40, 37.20, 39.20, 41.20, 40.50, 38.50, 36.70, 35.00,
36.70, 38.50, 40.60, 42.60, 41.90, 39.90, 38.00, 36.30, 38.00, 39.90, 42.00, 44.10, 43.40, 41.30, 39.40
"""
return sample_data.strip()
class DataConfig:
"""Configuration for available data sources"""
AVAILABLE_SOURCES = {
"Live ERCOT Data (Last 180 Days)": ERCOTDataSource,
"Sample Data": SampleDataSource,
}
@classmethod
def get_source_names(cls):
"""Get list of available data source names"""
return list(cls.AVAILABLE_SOURCES.keys())
@classmethod
def get_source(cls, source_name):
"""
Get a data source instance by name.
Args:
source_name: Name of the data source
Returns:
DataSource instance
"""
source_class = cls.AVAILABLE_SOURCES.get(source_name)
if source_class is None:
raise ValueError(f"Unknown data source: {source_name}")
return source_class()
def process_input(input_str):
"""
Convert comma-separated string to list of floats.
Args:
input_str: Comma-separated string of numbers
Returns:
List of float values
"""
return [float(x.strip()) for x in input_str.split(",") if x.strip()]
def fetch_data_with_fallback(source_name, days_back=180):
"""
Fetch data from specified source with fallback to sample data.
Args:
source_name: Name of the data source
days_back: Number of days to fetch
Returns:
Tuple of (data_string, source_used, error_message)
"""
try:
source = DataConfig.get_source(source_name)
data = source.fetch_data(days_back)
return data, source.name, None
except Exception as e:
# Fallback to sample data
sample_source = SampleDataSource()
data = sample_source.fetch_data()
return data, sample_source.name, str(e)