Spaces:
Sleeping
Sleeping
Add detailed logging and live status updates; instrument NOMADS discovery, open_dataset, indexing, fetch timings. Stream status to UI and console.
Browse files- .DS_Store +0 -0
- app.py +29 -23
- nbm_client.py +19 -1
.DS_Store
CHANGED
|
Binary files a/.DS_Store and b/.DS_Store differ
|
|
|
app.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
import re
|
|
|
|
|
|
|
| 3 |
from datetime import timezone
|
| 4 |
|
| 5 |
import gradio as gr
|
|
@@ -16,44 +18,48 @@ INTRO = (
|
|
| 16 |
|
| 17 |
|
| 18 |
def run_forecast(lat, lon, hours=24):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
if lat is None or lon is None:
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
-
lat =
|
| 23 |
-
lon = float(lon)
|
| 24 |
|
| 25 |
try:
|
|
|
|
| 26 |
dataset_url = get_latest_hourly_dataset_url()
|
|
|
|
| 27 |
except Exception as e:
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
value=(
|
| 31 |
-
f"Failed to locate latest NBM dataset: {e}\n"
|
| 32 |
-
"Try again in a few minutes."
|
| 33 |
-
)
|
| 34 |
-
),
|
| 35 |
-
None,
|
| 36 |
-
)
|
| 37 |
|
| 38 |
try:
|
|
|
|
| 39 |
df, meta = fetch_point_forecast_df(dataset_url, lat, lon, hours=hours)
|
| 40 |
except Exception as e:
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
value=(
|
| 44 |
-
f"Error fetching forecast at {lat:.3f}, {lon:.3f}: {e}\n"
|
| 45 |
-
f"Dataset: {dataset_url}"
|
| 46 |
-
)
|
| 47 |
-
),
|
| 48 |
-
None,
|
| 49 |
-
)
|
| 50 |
|
| 51 |
header = (
|
| 52 |
f"NBM hourly forecast (next {len(df)} hrs) at "
|
| 53 |
f"{meta['lat']:.3f}, {meta['lon']:.3f} (grid: lat[{meta['ilat']}], lon[{meta['ilon']}])\n"
|
| 54 |
-
f"Dataset: {dataset_url}"
|
| 55 |
)
|
| 56 |
-
|
| 57 |
|
| 58 |
|
| 59 |
with gr.Blocks(title="NBM Point Forecast (NOAA NOMADS)") as demo:
|
|
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
+
import time
|
| 4 |
+
import logging
|
| 5 |
from datetime import timezone
|
| 6 |
|
| 7 |
import gradio as gr
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
def run_forecast(lat, lon, hours=24):
|
| 21 |
+
"""Generator to provide live status updates to the UI and console logs."""
|
| 22 |
+
logging.basicConfig(level=logging.INFO)
|
| 23 |
+
t0 = time.perf_counter()
|
| 24 |
+
|
| 25 |
+
def y(msg, df=None):
|
| 26 |
+
print(msg, flush=True)
|
| 27 |
+
return gr.update(value=msg), df
|
| 28 |
+
|
| 29 |
if lat is None or lon is None:
|
| 30 |
+
yield y("Click map or enter lat/lon.")
|
| 31 |
+
return
|
| 32 |
+
|
| 33 |
+
try:
|
| 34 |
+
lat = float(lat)
|
| 35 |
+
lon = float(lon)
|
| 36 |
+
except Exception:
|
| 37 |
+
yield y("Invalid lat/lon.")
|
| 38 |
+
return
|
| 39 |
|
| 40 |
+
yield y(f"Starting forecast for lat={lat:.5f}, lon={lon:.5f}; hours={hours}")
|
|
|
|
| 41 |
|
| 42 |
try:
|
| 43 |
+
yield y("Discovering latest NBM hourly dataset on NOMADS ...")
|
| 44 |
dataset_url = get_latest_hourly_dataset_url()
|
| 45 |
+
yield y(f"Dataset selected: {dataset_url}")
|
| 46 |
except Exception as e:
|
| 47 |
+
yield y(f"Failed to locate latest NBM dataset: {e}")
|
| 48 |
+
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
try:
|
| 51 |
+
yield y("Opening dataset and indexing nearest grid point ...")
|
| 52 |
df, meta = fetch_point_forecast_df(dataset_url, lat, lon, hours=hours)
|
| 53 |
except Exception as e:
|
| 54 |
+
yield y(f"Error fetching forecast at {lat:.3f}, {lon:.3f}: {e}\nDataset: {dataset_url}")
|
| 55 |
+
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
header = (
|
| 58 |
f"NBM hourly forecast (next {len(df)} hrs) at "
|
| 59 |
f"{meta['lat']:.3f}, {meta['lon']:.3f} (grid: lat[{meta['ilat']}], lon[{meta['ilon']}])\n"
|
| 60 |
+
f"Dataset: {dataset_url} | total time {time.perf_counter()-t0:.1f}s"
|
| 61 |
)
|
| 62 |
+
yield y(header, df)
|
| 63 |
|
| 64 |
|
| 65 |
with gr.Blocks(title="NBM Point Forecast (NOAA NOMADS)") as demo:
|
nbm_client.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
import re
|
|
|
|
|
|
|
| 2 |
from dataclasses import dataclass
|
| 3 |
from typing import Dict, List, Tuple
|
| 4 |
|
|
@@ -16,6 +18,7 @@ class NBMError(Exception):
|
|
| 16 |
|
| 17 |
|
| 18 |
def _http_get(url: str, timeout: float = 10.0) -> str:
|
|
|
|
| 19 |
r = requests.get(url, timeout=timeout)
|
| 20 |
r.raise_for_status()
|
| 21 |
return r.text
|
|
@@ -28,6 +31,8 @@ def get_latest_hourly_dataset_url(base_root: str = BASE_ROOT) -> str:
|
|
| 28 |
Returns an OPeNDAP dataset base URL like:
|
| 29 |
https://nomads.ncep.noaa.gov/dods/blend/blendYYYYMMDD/blend_1hr_HHz
|
| 30 |
"""
|
|
|
|
|
|
|
| 31 |
root_html = _http_get(base_root)
|
| 32 |
|
| 33 |
# Find available day directories like 'blend20251004'
|
|
@@ -38,8 +43,10 @@ def get_latest_hourly_dataset_url(base_root: str = BASE_ROOT) -> str:
|
|
| 38 |
# Deduplicate and sort descending to prefer newest
|
| 39 |
unique_days = sorted(set(days), reverse=True)
|
| 40 |
|
|
|
|
| 41 |
for day in unique_days:
|
| 42 |
day_url = f"{base_root}blend{day}/"
|
|
|
|
| 43 |
html = _http_get(day_url)
|
| 44 |
# Find datasets named 'blend_1hr_XXz'
|
| 45 |
hours = re.findall(r"blend_1hr_(\d{2})z", html)
|
|
@@ -47,11 +54,13 @@ def get_latest_hourly_dataset_url(base_root: str = BASE_ROOT) -> str:
|
|
| 47 |
continue
|
| 48 |
# pick the highest hour suffix present
|
| 49 |
hour_ints = sorted({int(h) for h in hours}, reverse=True)
|
|
|
|
| 50 |
for hh in hour_ints:
|
| 51 |
ds_url = f"{base_root}blend{day}/blend_1hr_{hh:02d}z"
|
| 52 |
# Lightweight existence check by fetching the DDS header
|
| 53 |
try:
|
| 54 |
_ = _http_get(ds_url + ".dds")
|
|
|
|
| 55 |
return ds_url
|
| 56 |
except Exception:
|
| 57 |
continue
|
|
@@ -111,8 +120,12 @@ def fetch_point_forecast_df(
|
|
| 111 |
Returns (DataFrame, meta_dict)
|
| 112 |
DataFrame columns: time_utc, temp_F, dewpoint_F, wind_mph, gust_mph, cloud_cover_pct, precip_in
|
| 113 |
"""
|
|
|
|
|
|
|
|
|
|
| 114 |
# Open via pydap engine to avoid compiled netcdf dependencies on Spaces
|
| 115 |
ds = xr.open_dataset(dataset_url, engine="pydap", decode_cf=True)
|
|
|
|
| 116 |
|
| 117 |
# Ensure required variables exist; if not, raise a clear error
|
| 118 |
needed = ["tmp2m", "dpt2m", "wind10m", "gust10m", "tcdcsfc", "apcpsfc"]
|
|
@@ -123,17 +136,22 @@ def fetch_point_forecast_df(
|
|
| 123 |
# Fetch coordinate arrays locally to compute nearest grid index
|
| 124 |
lat_vals = ds["lat"].values # 1D
|
| 125 |
lon_vals = ds["lon"].values # 1D
|
|
|
|
| 126 |
|
| 127 |
ilat = _nearest_index(lat_vals, lat)
|
| 128 |
ilon = _nearest_index(lon_vals, lon)
|
| 129 |
|
| 130 |
# Extract a small subset across time at single gridpoint
|
|
|
|
| 131 |
subset = ds[needed].isel(lat=ilat, lon=ilon)
|
| 132 |
|
| 133 |
# Determine how many time steps are available
|
| 134 |
t_index = _to_datetime_index(subset["time"])
|
| 135 |
n = min(len(t_index), max(1, int(hours)))
|
|
|
|
|
|
|
| 136 |
subset = subset.isel(time=slice(0, n)).load()
|
|
|
|
| 137 |
t_index = t_index[:n]
|
| 138 |
|
| 139 |
# Build output arrays
|
|
@@ -167,5 +185,5 @@ def fetch_point_forecast_df(
|
|
| 167 |
"ilon": int(ilon),
|
| 168 |
}
|
| 169 |
|
|
|
|
| 170 |
return df, meta
|
| 171 |
-
|
|
|
|
| 1 |
import re
|
| 2 |
+
import time
|
| 3 |
+
import logging
|
| 4 |
from dataclasses import dataclass
|
| 5 |
from typing import Dict, List, Tuple
|
| 6 |
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
def _http_get(url: str, timeout: float = 10.0) -> str:
|
| 21 |
+
logging.info(f"HTTP GET {url}")
|
| 22 |
r = requests.get(url, timeout=timeout)
|
| 23 |
r.raise_for_status()
|
| 24 |
return r.text
|
|
|
|
| 31 |
Returns an OPeNDAP dataset base URL like:
|
| 32 |
https://nomads.ncep.noaa.gov/dods/blend/blendYYYYMMDD/blend_1hr_HHz
|
| 33 |
"""
|
| 34 |
+
logging.info("Discovering latest NBM hourly dataset ...")
|
| 35 |
+
t0 = time.perf_counter()
|
| 36 |
root_html = _http_get(base_root)
|
| 37 |
|
| 38 |
# Find available day directories like 'blend20251004'
|
|
|
|
| 43 |
# Deduplicate and sort descending to prefer newest
|
| 44 |
unique_days = sorted(set(days), reverse=True)
|
| 45 |
|
| 46 |
+
logging.info(f"Found day directories: {unique_days[:3]} ... (total {len(unique_days)})")
|
| 47 |
for day in unique_days:
|
| 48 |
day_url = f"{base_root}blend{day}/"
|
| 49 |
+
logging.info(f"Scanning day {day} at {day_url}")
|
| 50 |
html = _http_get(day_url)
|
| 51 |
# Find datasets named 'blend_1hr_XXz'
|
| 52 |
hours = re.findall(r"blend_1hr_(\d{2})z", html)
|
|
|
|
| 54 |
continue
|
| 55 |
# pick the highest hour suffix present
|
| 56 |
hour_ints = sorted({int(h) for h in hours}, reverse=True)
|
| 57 |
+
logging.info(f"Available hours for {day}: {hour_ints}")
|
| 58 |
for hh in hour_ints:
|
| 59 |
ds_url = f"{base_root}blend{day}/blend_1hr_{hh:02d}z"
|
| 60 |
# Lightweight existence check by fetching the DDS header
|
| 61 |
try:
|
| 62 |
_ = _http_get(ds_url + ".dds")
|
| 63 |
+
logging.info(f"Selected dataset: {ds_url} (discovered in {time.perf_counter()-t0:.2f}s)")
|
| 64 |
return ds_url
|
| 65 |
except Exception:
|
| 66 |
continue
|
|
|
|
| 120 |
Returns (DataFrame, meta_dict)
|
| 121 |
DataFrame columns: time_utc, temp_F, dewpoint_F, wind_mph, gust_mph, cloud_cover_pct, precip_in
|
| 122 |
"""
|
| 123 |
+
logger = logging.getLogger(__name__)
|
| 124 |
+
logger.info(f"Opening dataset via pydap: {dataset_url}")
|
| 125 |
+
t_open = time.perf_counter()
|
| 126 |
# Open via pydap engine to avoid compiled netcdf dependencies on Spaces
|
| 127 |
ds = xr.open_dataset(dataset_url, engine="pydap", decode_cf=True)
|
| 128 |
+
logger.info(f"Dataset opened in {time.perf_counter()-t_open:.2f}s; variables: {list(ds.variables)[:8]} ...")
|
| 129 |
|
| 130 |
# Ensure required variables exist; if not, raise a clear error
|
| 131 |
needed = ["tmp2m", "dpt2m", "wind10m", "gust10m", "tcdcsfc", "apcpsfc"]
|
|
|
|
| 136 |
# Fetch coordinate arrays locally to compute nearest grid index
|
| 137 |
lat_vals = ds["lat"].values # 1D
|
| 138 |
lon_vals = ds["lon"].values # 1D
|
| 139 |
+
logger.info(f"Coords loaded. Lat size={lat_vals.size}, Lon size={lon_vals.size}")
|
| 140 |
|
| 141 |
ilat = _nearest_index(lat_vals, lat)
|
| 142 |
ilon = _nearest_index(lon_vals, lon)
|
| 143 |
|
| 144 |
# Extract a small subset across time at single gridpoint
|
| 145 |
+
logger.info(f"Nearest gridpoint indices: ilat={ilat}, ilon={ilon}; lat={lat_vals[ilat]:.5f}, lon={lon_vals[ilon]:.5f}")
|
| 146 |
subset = ds[needed].isel(lat=ilat, lon=ilon)
|
| 147 |
|
| 148 |
# Determine how many time steps are available
|
| 149 |
t_index = _to_datetime_index(subset["time"])
|
| 150 |
n = min(len(t_index), max(1, int(hours)))
|
| 151 |
+
logger.info(f"Time steps available={len(t_index)}; requesting first {n} hours")
|
| 152 |
+
t_fetch = time.perf_counter()
|
| 153 |
subset = subset.isel(time=slice(0, n)).load()
|
| 154 |
+
logger.info(f"Fetched subset data in {time.perf_counter()-t_fetch:.2f}s")
|
| 155 |
t_index = t_index[:n]
|
| 156 |
|
| 157 |
# Build output arrays
|
|
|
|
| 185 |
"ilon": int(ilon),
|
| 186 |
}
|
| 187 |
|
| 188 |
+
logger.info(f"Built DataFrame rows={len(df)}")
|
| 189 |
return df, meta
|
|
|