multi-hazard-warning-system / src /data /fetch_weather.py
krupal02's picture
Deploy Multi-Hazard Warning System - MTL model for wildfire risk + AQI forecasting
d5b0af1
Raw
History Blame Contribute Delete
6.67 kB
"""
fetch_weather.py — Open-Meteo API fetcher for historical weather data.
Downloads 7-day rolling windows of weather variables aligned to each
image timestamp. Open-Meteo is free and requires no API key.
Falls back to realistic synthetic data if the API is unavailable.
"""
import logging
from datetime import datetime, timedelta
from typing import Optional, Tuple
import numpy as np
import pandas as pd
import requests
from src.training.config import (
OPEN_METEO_URL, WEATHER_RAW_DIR, TIMESERIES_DIR,
TIMESERIES_WINDOW, TIMESERIES_FEATURES, NUM_SYNTHETIC_SAMPLES,
)
logger = logging.getLogger(__name__)
def fetch_weather_data(
latitude: float = 37.5,
longitude: float = -120.3,
start_date: str = "2024-06-01",
end_date: str = "2024-06-08",
) -> Optional[pd.DataFrame]:
"""
Fetch historical weather data from Open-Meteo archive API.
Variables fetched:
- temperature_2m (°C)
- relativehumidity_2m (%)
- windspeed_10m (km/h)
- winddirection_10m (°)
- precipitation (mm)
Args:
latitude: Location latitude.
longitude: Location longitude.
start_date: Start date (YYYY-MM-DD).
end_date: End date (YYYY-MM-DD).
Returns:
DataFrame with daily weather data, or None on failure.
"""
params = {
"latitude": latitude,
"longitude": longitude,
"start_date": start_date,
"end_date": end_date,
"daily": "temperature_2m_mean,relative_humidity_2m_mean,"
"windspeed_10m_max,winddirection_10m_dominant,"
"precipitation_sum",
"timezone": "auto",
}
logger.info(f"Fetching Open-Meteo weather for ({latitude}, {longitude}) "
f"from {start_date} to {end_date}")
try:
response = requests.get(OPEN_METEO_URL, params=params, timeout=30)
response.raise_for_status()
data = response.json()
if "daily" not in data:
logger.warning("No 'daily' key in Open-Meteo response.")
return None
daily = data["daily"]
df = pd.DataFrame({
"date": pd.to_datetime(daily["time"]),
"temperature": daily.get("temperature_2m_mean", [None] * len(daily["time"])),
"humidity": daily.get("relative_humidity_2m_mean", [None] * len(daily["time"])),
"wind_speed": daily.get("windspeed_10m_max", [None] * len(daily["time"])),
"wind_direction": daily.get("winddirection_10m_dominant", [None] * len(daily["time"])),
"precipitation": daily.get("precipitation_sum", [None] * len(daily["time"])),
})
# Forward fill + interpolate missing values
df = df.ffill().bfill()
df = df.fillna(0.0)
logger.info(f"Fetched {len(df)} days of weather data.")
return df
except requests.RequestException as e:
logger.error(f"Open-Meteo API request failed: {e}")
return None
except (KeyError, ValueError) as e:
logger.error(f"Error parsing Open-Meteo response: {e}")
return None
def generate_synthetic_weather(
num_samples: int = NUM_SYNTHETIC_SAMPLES,
window: int = TIMESERIES_WINDOW,
) -> np.ndarray:
"""
Generate realistic synthetic weather time series.
Creates weather sequences with realistic value ranges and temporal
autocorrelation to mimic real weather patterns.
Args:
num_samples: Number of 7-day sequences.
window: Length of each sequence in days.
Returns:
Array of shape (num_samples, window, 5) — 5 weather features.
"""
logger.info(f"Generating {num_samples} synthetic weather sequences...")
data = np.zeros((num_samples, window, 5), dtype=np.float32)
for i in range(num_samples):
# Temperature: 15–45°C with daily variation (fire-season ranges)
base_temp = np.random.uniform(20, 40)
data[i, :, 0] = base_temp + np.cumsum(np.random.randn(window) * 1.5)
data[i, :, 0] = np.clip(data[i, :, 0], 10, 50)
# Humidity: 10–90%
base_hum = np.random.uniform(20, 60)
data[i, :, 1] = base_hum + np.cumsum(np.random.randn(window) * 3)
data[i, :, 1] = np.clip(data[i, :, 1], 5, 95)
# Wind speed: 0–60 km/h
data[i, :, 2] = np.abs(np.random.randn(window) * 10 + 15)
data[i, :, 2] = np.clip(data[i, :, 2], 0, 60)
# Wind direction: 0–360°
base_dir = np.random.uniform(0, 360)
data[i, :, 3] = (base_dir + np.cumsum(np.random.randn(window) * 20)) % 360
# Precipitation: mostly 0, occasionally 0–20mm
data[i, :, 4] = np.maximum(0, np.random.randn(window) * 2 - 1)
if np.random.rand() > 0.5: # 50% chance of dry period
data[i, :, 4] = 0
return data
def fetch_weather_for_location(
latitude: float,
longitude: float,
target_date: str,
window: int = TIMESERIES_WINDOW,
save: bool = False,
) -> np.ndarray:
"""
Fetch a 7-day weather window ending at target_date.
Returns:
Array of shape (window, 5).
"""
try:
end_dt = datetime.strptime(target_date, "%Y-%m-%d")
start_dt = end_dt - timedelta(days=window - 1)
df = fetch_weather_data(
latitude=latitude,
longitude=longitude,
start_date=start_dt.strftime("%Y-%m-%d"),
end_date=end_dt.strftime("%Y-%m-%d"),
)
if df is not None and len(df) >= window:
cols = ["temperature", "humidity", "wind_speed", "wind_direction", "precipitation"]
arr = df[cols].values[-window:].astype(np.float32)
if save:
np.save(WEATHER_RAW_DIR / f"weather_{target_date}.npy", arr)
return arr
except Exception as e:
logger.error(f"Error fetching weather data: {e}")
# Fallback to synthetic
logger.warning("Falling back to synthetic weather data.")
return generate_synthetic_weather(num_samples=1, window=window)[0]
def fetch_and_prepare_weather(
num_samples: int = NUM_SYNTHETIC_SAMPLES,
save: bool = True,
) -> np.ndarray:
"""
Generate or fetch complete weather dataset for training.
Returns:
Array of shape (num_samples, TIMESERIES_WINDOW, 5).
"""
weather_data = generate_synthetic_weather(num_samples=num_samples)
if save:
np.save(TIMESERIES_DIR / "weather_sequences.npy", weather_data)
logger.info(f"Saved weather data: {weather_data.shape}")
return weather_data
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
data = fetch_and_prepare_weather()
print(f"Weather data shape: {data.shape}")