| """ |
| fetch_weather.py — Open-Meteo API fetcher for historical weather data. |
| |
| Downloads 7-day rolling windows of weather variables aligned to each |
| image timestamp. Open-Meteo is free and requires no API key. |
| Falls back to realistic synthetic data if the API is unavailable. |
| """ |
|
|
| import logging |
| from datetime import datetime, timedelta |
| from typing import Optional, Tuple |
|
|
| import numpy as np |
| import pandas as pd |
| import requests |
|
|
| from src.training.config import ( |
| OPEN_METEO_URL, WEATHER_RAW_DIR, TIMESERIES_DIR, |
| TIMESERIES_WINDOW, TIMESERIES_FEATURES, NUM_SYNTHETIC_SAMPLES, |
| ) |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| def fetch_weather_data( |
| latitude: float = 37.5, |
| longitude: float = -120.3, |
| start_date: str = "2024-06-01", |
| end_date: str = "2024-06-08", |
| ) -> Optional[pd.DataFrame]: |
| """ |
| Fetch historical weather data from Open-Meteo archive API. |
| |
| Variables fetched: |
| - temperature_2m (°C) |
| - relativehumidity_2m (%) |
| - windspeed_10m (km/h) |
| - winddirection_10m (°) |
| - precipitation (mm) |
| |
| Args: |
| latitude: Location latitude. |
| longitude: Location longitude. |
| start_date: Start date (YYYY-MM-DD). |
| end_date: End date (YYYY-MM-DD). |
| |
| Returns: |
| DataFrame with daily weather data, or None on failure. |
| """ |
| params = { |
| "latitude": latitude, |
| "longitude": longitude, |
| "start_date": start_date, |
| "end_date": end_date, |
| "daily": "temperature_2m_mean,relative_humidity_2m_mean," |
| "windspeed_10m_max,winddirection_10m_dominant," |
| "precipitation_sum", |
| "timezone": "auto", |
| } |
|
|
| logger.info(f"Fetching Open-Meteo weather for ({latitude}, {longitude}) " |
| f"from {start_date} to {end_date}") |
|
|
| try: |
| response = requests.get(OPEN_METEO_URL, params=params, timeout=30) |
| response.raise_for_status() |
| data = response.json() |
|
|
| if "daily" not in data: |
| logger.warning("No 'daily' key in Open-Meteo response.") |
| return None |
|
|
| daily = data["daily"] |
| df = pd.DataFrame({ |
| "date": pd.to_datetime(daily["time"]), |
| "temperature": daily.get("temperature_2m_mean", [None] * len(daily["time"])), |
| "humidity": daily.get("relative_humidity_2m_mean", [None] * len(daily["time"])), |
| "wind_speed": daily.get("windspeed_10m_max", [None] * len(daily["time"])), |
| "wind_direction": daily.get("winddirection_10m_dominant", [None] * len(daily["time"])), |
| "precipitation": daily.get("precipitation_sum", [None] * len(daily["time"])), |
| }) |
|
|
| |
| df = df.ffill().bfill() |
| df = df.fillna(0.0) |
|
|
| logger.info(f"Fetched {len(df)} days of weather data.") |
| return df |
|
|
| except requests.RequestException as e: |
| logger.error(f"Open-Meteo API request failed: {e}") |
| return None |
| except (KeyError, ValueError) as e: |
| logger.error(f"Error parsing Open-Meteo response: {e}") |
| return None |
|
|
|
|
| def generate_synthetic_weather( |
| num_samples: int = NUM_SYNTHETIC_SAMPLES, |
| window: int = TIMESERIES_WINDOW, |
| ) -> np.ndarray: |
| """ |
| Generate realistic synthetic weather time series. |
| |
| Creates weather sequences with realistic value ranges and temporal |
| autocorrelation to mimic real weather patterns. |
| |
| Args: |
| num_samples: Number of 7-day sequences. |
| window: Length of each sequence in days. |
| |
| Returns: |
| Array of shape (num_samples, window, 5) — 5 weather features. |
| """ |
| logger.info(f"Generating {num_samples} synthetic weather sequences...") |
| data = np.zeros((num_samples, window, 5), dtype=np.float32) |
|
|
| for i in range(num_samples): |
| |
| base_temp = np.random.uniform(20, 40) |
| data[i, :, 0] = base_temp + np.cumsum(np.random.randn(window) * 1.5) |
| data[i, :, 0] = np.clip(data[i, :, 0], 10, 50) |
|
|
| |
| base_hum = np.random.uniform(20, 60) |
| data[i, :, 1] = base_hum + np.cumsum(np.random.randn(window) * 3) |
| data[i, :, 1] = np.clip(data[i, :, 1], 5, 95) |
|
|
| |
| data[i, :, 2] = np.abs(np.random.randn(window) * 10 + 15) |
| data[i, :, 2] = np.clip(data[i, :, 2], 0, 60) |
|
|
| |
| base_dir = np.random.uniform(0, 360) |
| data[i, :, 3] = (base_dir + np.cumsum(np.random.randn(window) * 20)) % 360 |
|
|
| |
| data[i, :, 4] = np.maximum(0, np.random.randn(window) * 2 - 1) |
| if np.random.rand() > 0.5: |
| data[i, :, 4] = 0 |
|
|
| return data |
|
|
|
|
| def fetch_weather_for_location( |
| latitude: float, |
| longitude: float, |
| target_date: str, |
| window: int = TIMESERIES_WINDOW, |
| save: bool = False, |
| ) -> np.ndarray: |
| """ |
| Fetch a 7-day weather window ending at target_date. |
| |
| Returns: |
| Array of shape (window, 5). |
| """ |
| try: |
| end_dt = datetime.strptime(target_date, "%Y-%m-%d") |
| start_dt = end_dt - timedelta(days=window - 1) |
|
|
| df = fetch_weather_data( |
| latitude=latitude, |
| longitude=longitude, |
| start_date=start_dt.strftime("%Y-%m-%d"), |
| end_date=end_dt.strftime("%Y-%m-%d"), |
| ) |
|
|
| if df is not None and len(df) >= window: |
| cols = ["temperature", "humidity", "wind_speed", "wind_direction", "precipitation"] |
| arr = df[cols].values[-window:].astype(np.float32) |
| if save: |
| np.save(WEATHER_RAW_DIR / f"weather_{target_date}.npy", arr) |
| return arr |
| except Exception as e: |
| logger.error(f"Error fetching weather data: {e}") |
|
|
| |
| logger.warning("Falling back to synthetic weather data.") |
| return generate_synthetic_weather(num_samples=1, window=window)[0] |
|
|
|
|
| def fetch_and_prepare_weather( |
| num_samples: int = NUM_SYNTHETIC_SAMPLES, |
| save: bool = True, |
| ) -> np.ndarray: |
| """ |
| Generate or fetch complete weather dataset for training. |
| |
| Returns: |
| Array of shape (num_samples, TIMESERIES_WINDOW, 5). |
| """ |
| weather_data = generate_synthetic_weather(num_samples=num_samples) |
|
|
| if save: |
| np.save(TIMESERIES_DIR / "weather_sequences.npy", weather_data) |
| logger.info(f"Saved weather data: {weather_data.shape}") |
|
|
| return weather_data |
|
|
|
|
| if __name__ == "__main__": |
| logging.basicConfig(level=logging.INFO) |
| data = fetch_and_prepare_weather() |
| print(f"Weather data shape: {data.shape}") |
|
|