""" fetch_weather.py — Open-Meteo API fetcher for historical weather data. Downloads 7-day rolling windows of weather variables aligned to each image timestamp. Open-Meteo is free and requires no API key. Falls back to realistic synthetic data if the API is unavailable. """ import logging from datetime import datetime, timedelta from typing import Optional, Tuple import numpy as np import pandas as pd import requests from src.training.config import ( OPEN_METEO_URL, WEATHER_RAW_DIR, TIMESERIES_DIR, TIMESERIES_WINDOW, TIMESERIES_FEATURES, NUM_SYNTHETIC_SAMPLES, ) logger = logging.getLogger(__name__) def fetch_weather_data( latitude: float = 37.5, longitude: float = -120.3, start_date: str = "2024-06-01", end_date: str = "2024-06-08", ) -> Optional[pd.DataFrame]: """ Fetch historical weather data from Open-Meteo archive API. Variables fetched: - temperature_2m (°C) - relativehumidity_2m (%) - windspeed_10m (km/h) - winddirection_10m (°) - precipitation (mm) Args: latitude: Location latitude. longitude: Location longitude. start_date: Start date (YYYY-MM-DD). end_date: End date (YYYY-MM-DD). Returns: DataFrame with daily weather data, or None on failure. """ params = { "latitude": latitude, "longitude": longitude, "start_date": start_date, "end_date": end_date, "daily": "temperature_2m_mean,relative_humidity_2m_mean," "windspeed_10m_max,winddirection_10m_dominant," "precipitation_sum", "timezone": "auto", } logger.info(f"Fetching Open-Meteo weather for ({latitude}, {longitude}) " f"from {start_date} to {end_date}") try: response = requests.get(OPEN_METEO_URL, params=params, timeout=30) response.raise_for_status() data = response.json() if "daily" not in data: logger.warning("No 'daily' key in Open-Meteo response.") return None daily = data["daily"] df = pd.DataFrame({ "date": pd.to_datetime(daily["time"]), "temperature": daily.get("temperature_2m_mean", [None] * len(daily["time"])), "humidity": daily.get("relative_humidity_2m_mean", [None] * len(daily["time"])), "wind_speed": daily.get("windspeed_10m_max", [None] * len(daily["time"])), "wind_direction": daily.get("winddirection_10m_dominant", [None] * len(daily["time"])), "precipitation": daily.get("precipitation_sum", [None] * len(daily["time"])), }) # Forward fill + interpolate missing values df = df.ffill().bfill() df = df.fillna(0.0) logger.info(f"Fetched {len(df)} days of weather data.") return df except requests.RequestException as e: logger.error(f"Open-Meteo API request failed: {e}") return None except (KeyError, ValueError) as e: logger.error(f"Error parsing Open-Meteo response: {e}") return None def generate_synthetic_weather( num_samples: int = NUM_SYNTHETIC_SAMPLES, window: int = TIMESERIES_WINDOW, ) -> np.ndarray: """ Generate realistic synthetic weather time series. Creates weather sequences with realistic value ranges and temporal autocorrelation to mimic real weather patterns. Args: num_samples: Number of 7-day sequences. window: Length of each sequence in days. Returns: Array of shape (num_samples, window, 5) — 5 weather features. """ logger.info(f"Generating {num_samples} synthetic weather sequences...") data = np.zeros((num_samples, window, 5), dtype=np.float32) for i in range(num_samples): # Temperature: 15–45°C with daily variation (fire-season ranges) base_temp = np.random.uniform(20, 40) data[i, :, 0] = base_temp + np.cumsum(np.random.randn(window) * 1.5) data[i, :, 0] = np.clip(data[i, :, 0], 10, 50) # Humidity: 10–90% base_hum = np.random.uniform(20, 60) data[i, :, 1] = base_hum + np.cumsum(np.random.randn(window) * 3) data[i, :, 1] = np.clip(data[i, :, 1], 5, 95) # Wind speed: 0–60 km/h data[i, :, 2] = np.abs(np.random.randn(window) * 10 + 15) data[i, :, 2] = np.clip(data[i, :, 2], 0, 60) # Wind direction: 0–360° base_dir = np.random.uniform(0, 360) data[i, :, 3] = (base_dir + np.cumsum(np.random.randn(window) * 20)) % 360 # Precipitation: mostly 0, occasionally 0–20mm data[i, :, 4] = np.maximum(0, np.random.randn(window) * 2 - 1) if np.random.rand() > 0.5: # 50% chance of dry period data[i, :, 4] = 0 return data def fetch_weather_for_location( latitude: float, longitude: float, target_date: str, window: int = TIMESERIES_WINDOW, save: bool = False, ) -> np.ndarray: """ Fetch a 7-day weather window ending at target_date. Returns: Array of shape (window, 5). """ try: end_dt = datetime.strptime(target_date, "%Y-%m-%d") start_dt = end_dt - timedelta(days=window - 1) df = fetch_weather_data( latitude=latitude, longitude=longitude, start_date=start_dt.strftime("%Y-%m-%d"), end_date=end_dt.strftime("%Y-%m-%d"), ) if df is not None and len(df) >= window: cols = ["temperature", "humidity", "wind_speed", "wind_direction", "precipitation"] arr = df[cols].values[-window:].astype(np.float32) if save: np.save(WEATHER_RAW_DIR / f"weather_{target_date}.npy", arr) return arr except Exception as e: logger.error(f"Error fetching weather data: {e}") # Fallback to synthetic logger.warning("Falling back to synthetic weather data.") return generate_synthetic_weather(num_samples=1, window=window)[0] def fetch_and_prepare_weather( num_samples: int = NUM_SYNTHETIC_SAMPLES, save: bool = True, ) -> np.ndarray: """ Generate or fetch complete weather dataset for training. Returns: Array of shape (num_samples, TIMESERIES_WINDOW, 5). """ weather_data = generate_synthetic_weather(num_samples=num_samples) if save: np.save(TIMESERIES_DIR / "weather_sequences.npy", weather_data) logger.info(f"Saved weather data: {weather_data.shape}") return weather_data if __name__ == "__main__": logging.basicConfig(level=logging.INFO) data = fetch_and_prepare_weather() print(f"Weather data shape: {data.shape}")