nakas commited on
Commit
fc56e93
·
1 Parent(s): 2c22925

Add detailed logging and live status updates; instrument NOMADS discovery, open_dataset, indexing, fetch timings. Stream status to UI and console.

Browse files
Files changed (3) hide show
  1. .DS_Store +0 -0
  2. app.py +29 -23
  3. nbm_client.py +19 -1
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import os
2
  import re
 
 
3
  from datetime import timezone
4
 
5
  import gradio as gr
@@ -16,44 +18,48 @@ INTRO = (
16
 
17
 
18
  def run_forecast(lat, lon, hours=24):
 
 
 
 
 
 
 
 
19
  if lat is None or lon is None:
20
- return gr.update(value="Click map or enter lat/lon."), None
 
 
 
 
 
 
 
 
21
 
22
- lat = float(lat)
23
- lon = float(lon)
24
 
25
  try:
 
26
  dataset_url = get_latest_hourly_dataset_url()
 
27
  except Exception as e:
28
- return (
29
- gr.update(
30
- value=(
31
- f"Failed to locate latest NBM dataset: {e}\n"
32
- "Try again in a few minutes."
33
- )
34
- ),
35
- None,
36
- )
37
 
38
  try:
 
39
  df, meta = fetch_point_forecast_df(dataset_url, lat, lon, hours=hours)
40
  except Exception as e:
41
- return (
42
- gr.update(
43
- value=(
44
- f"Error fetching forecast at {lat:.3f}, {lon:.3f}: {e}\n"
45
- f"Dataset: {dataset_url}"
46
- )
47
- ),
48
- None,
49
- )
50
 
51
  header = (
52
  f"NBM hourly forecast (next {len(df)} hrs) at "
53
  f"{meta['lat']:.3f}, {meta['lon']:.3f} (grid: lat[{meta['ilat']}], lon[{meta['ilon']}])\n"
54
- f"Dataset: {dataset_url}"
55
  )
56
- return header, df
57
 
58
 
59
  with gr.Blocks(title="NBM Point Forecast (NOAA NOMADS)") as demo:
 
1
  import os
2
  import re
3
+ import time
4
+ import logging
5
  from datetime import timezone
6
 
7
  import gradio as gr
 
18
 
19
 
20
  def run_forecast(lat, lon, hours=24):
21
+ """Generator to provide live status updates to the UI and console logs."""
22
+ logging.basicConfig(level=logging.INFO)
23
+ t0 = time.perf_counter()
24
+
25
+ def y(msg, df=None):
26
+ print(msg, flush=True)
27
+ return gr.update(value=msg), df
28
+
29
  if lat is None or lon is None:
30
+ yield y("Click map or enter lat/lon.")
31
+ return
32
+
33
+ try:
34
+ lat = float(lat)
35
+ lon = float(lon)
36
+ except Exception:
37
+ yield y("Invalid lat/lon.")
38
+ return
39
 
40
+ yield y(f"Starting forecast for lat={lat:.5f}, lon={lon:.5f}; hours={hours}")
 
41
 
42
  try:
43
+ yield y("Discovering latest NBM hourly dataset on NOMADS ...")
44
  dataset_url = get_latest_hourly_dataset_url()
45
+ yield y(f"Dataset selected: {dataset_url}")
46
  except Exception as e:
47
+ yield y(f"Failed to locate latest NBM dataset: {e}")
48
+ return
 
 
 
 
 
 
 
49
 
50
  try:
51
+ yield y("Opening dataset and indexing nearest grid point ...")
52
  df, meta = fetch_point_forecast_df(dataset_url, lat, lon, hours=hours)
53
  except Exception as e:
54
+ yield y(f"Error fetching forecast at {lat:.3f}, {lon:.3f}: {e}\nDataset: {dataset_url}")
55
+ return
 
 
 
 
 
 
 
56
 
57
  header = (
58
  f"NBM hourly forecast (next {len(df)} hrs) at "
59
  f"{meta['lat']:.3f}, {meta['lon']:.3f} (grid: lat[{meta['ilat']}], lon[{meta['ilon']}])\n"
60
+ f"Dataset: {dataset_url} | total time {time.perf_counter()-t0:.1f}s"
61
  )
62
+ yield y(header, df)
63
 
64
 
65
  with gr.Blocks(title="NBM Point Forecast (NOAA NOMADS)") as demo:
nbm_client.py CHANGED
@@ -1,4 +1,6 @@
1
  import re
 
 
2
  from dataclasses import dataclass
3
  from typing import Dict, List, Tuple
4
 
@@ -16,6 +18,7 @@ class NBMError(Exception):
16
 
17
 
18
  def _http_get(url: str, timeout: float = 10.0) -> str:
 
19
  r = requests.get(url, timeout=timeout)
20
  r.raise_for_status()
21
  return r.text
@@ -28,6 +31,8 @@ def get_latest_hourly_dataset_url(base_root: str = BASE_ROOT) -> str:
28
  Returns an OPeNDAP dataset base URL like:
29
  https://nomads.ncep.noaa.gov/dods/blend/blendYYYYMMDD/blend_1hr_HHz
30
  """
 
 
31
  root_html = _http_get(base_root)
32
 
33
  # Find available day directories like 'blend20251004'
@@ -38,8 +43,10 @@ def get_latest_hourly_dataset_url(base_root: str = BASE_ROOT) -> str:
38
  # Deduplicate and sort descending to prefer newest
39
  unique_days = sorted(set(days), reverse=True)
40
 
 
41
  for day in unique_days:
42
  day_url = f"{base_root}blend{day}/"
 
43
  html = _http_get(day_url)
44
  # Find datasets named 'blend_1hr_XXz'
45
  hours = re.findall(r"blend_1hr_(\d{2})z", html)
@@ -47,11 +54,13 @@ def get_latest_hourly_dataset_url(base_root: str = BASE_ROOT) -> str:
47
  continue
48
  # pick the highest hour suffix present
49
  hour_ints = sorted({int(h) for h in hours}, reverse=True)
 
50
  for hh in hour_ints:
51
  ds_url = f"{base_root}blend{day}/blend_1hr_{hh:02d}z"
52
  # Lightweight existence check by fetching the DDS header
53
  try:
54
  _ = _http_get(ds_url + ".dds")
 
55
  return ds_url
56
  except Exception:
57
  continue
@@ -111,8 +120,12 @@ def fetch_point_forecast_df(
111
  Returns (DataFrame, meta_dict)
112
  DataFrame columns: time_utc, temp_F, dewpoint_F, wind_mph, gust_mph, cloud_cover_pct, precip_in
113
  """
 
 
 
114
  # Open via pydap engine to avoid compiled netcdf dependencies on Spaces
115
  ds = xr.open_dataset(dataset_url, engine="pydap", decode_cf=True)
 
116
 
117
  # Ensure required variables exist; if not, raise a clear error
118
  needed = ["tmp2m", "dpt2m", "wind10m", "gust10m", "tcdcsfc", "apcpsfc"]
@@ -123,17 +136,22 @@ def fetch_point_forecast_df(
123
  # Fetch coordinate arrays locally to compute nearest grid index
124
  lat_vals = ds["lat"].values # 1D
125
  lon_vals = ds["lon"].values # 1D
 
126
 
127
  ilat = _nearest_index(lat_vals, lat)
128
  ilon = _nearest_index(lon_vals, lon)
129
 
130
  # Extract a small subset across time at single gridpoint
 
131
  subset = ds[needed].isel(lat=ilat, lon=ilon)
132
 
133
  # Determine how many time steps are available
134
  t_index = _to_datetime_index(subset["time"])
135
  n = min(len(t_index), max(1, int(hours)))
 
 
136
  subset = subset.isel(time=slice(0, n)).load()
 
137
  t_index = t_index[:n]
138
 
139
  # Build output arrays
@@ -167,5 +185,5 @@ def fetch_point_forecast_df(
167
  "ilon": int(ilon),
168
  }
169
 
 
170
  return df, meta
171
-
 
1
  import re
2
+ import time
3
+ import logging
4
  from dataclasses import dataclass
5
  from typing import Dict, List, Tuple
6
 
 
18
 
19
 
20
  def _http_get(url: str, timeout: float = 10.0) -> str:
21
+ logging.info(f"HTTP GET {url}")
22
  r = requests.get(url, timeout=timeout)
23
  r.raise_for_status()
24
  return r.text
 
31
  Returns an OPeNDAP dataset base URL like:
32
  https://nomads.ncep.noaa.gov/dods/blend/blendYYYYMMDD/blend_1hr_HHz
33
  """
34
+ logging.info("Discovering latest NBM hourly dataset ...")
35
+ t0 = time.perf_counter()
36
  root_html = _http_get(base_root)
37
 
38
  # Find available day directories like 'blend20251004'
 
43
  # Deduplicate and sort descending to prefer newest
44
  unique_days = sorted(set(days), reverse=True)
45
 
46
+ logging.info(f"Found day directories: {unique_days[:3]} ... (total {len(unique_days)})")
47
  for day in unique_days:
48
  day_url = f"{base_root}blend{day}/"
49
+ logging.info(f"Scanning day {day} at {day_url}")
50
  html = _http_get(day_url)
51
  # Find datasets named 'blend_1hr_XXz'
52
  hours = re.findall(r"blend_1hr_(\d{2})z", html)
 
54
  continue
55
  # pick the highest hour suffix present
56
  hour_ints = sorted({int(h) for h in hours}, reverse=True)
57
+ logging.info(f"Available hours for {day}: {hour_ints}")
58
  for hh in hour_ints:
59
  ds_url = f"{base_root}blend{day}/blend_1hr_{hh:02d}z"
60
  # Lightweight existence check by fetching the DDS header
61
  try:
62
  _ = _http_get(ds_url + ".dds")
63
+ logging.info(f"Selected dataset: {ds_url} (discovered in {time.perf_counter()-t0:.2f}s)")
64
  return ds_url
65
  except Exception:
66
  continue
 
120
  Returns (DataFrame, meta_dict)
121
  DataFrame columns: time_utc, temp_F, dewpoint_F, wind_mph, gust_mph, cloud_cover_pct, precip_in
122
  """
123
+ logger = logging.getLogger(__name__)
124
+ logger.info(f"Opening dataset via pydap: {dataset_url}")
125
+ t_open = time.perf_counter()
126
  # Open via pydap engine to avoid compiled netcdf dependencies on Spaces
127
  ds = xr.open_dataset(dataset_url, engine="pydap", decode_cf=True)
128
+ logger.info(f"Dataset opened in {time.perf_counter()-t_open:.2f}s; variables: {list(ds.variables)[:8]} ...")
129
 
130
  # Ensure required variables exist; if not, raise a clear error
131
  needed = ["tmp2m", "dpt2m", "wind10m", "gust10m", "tcdcsfc", "apcpsfc"]
 
136
  # Fetch coordinate arrays locally to compute nearest grid index
137
  lat_vals = ds["lat"].values # 1D
138
  lon_vals = ds["lon"].values # 1D
139
+ logger.info(f"Coords loaded. Lat size={lat_vals.size}, Lon size={lon_vals.size}")
140
 
141
  ilat = _nearest_index(lat_vals, lat)
142
  ilon = _nearest_index(lon_vals, lon)
143
 
144
  # Extract a small subset across time at single gridpoint
145
+ logger.info(f"Nearest gridpoint indices: ilat={ilat}, ilon={ilon}; lat={lat_vals[ilat]:.5f}, lon={lon_vals[ilon]:.5f}")
146
  subset = ds[needed].isel(lat=ilat, lon=ilon)
147
 
148
  # Determine how many time steps are available
149
  t_index = _to_datetime_index(subset["time"])
150
  n = min(len(t_index), max(1, int(hours)))
151
+ logger.info(f"Time steps available={len(t_index)}; requesting first {n} hours")
152
+ t_fetch = time.perf_counter()
153
  subset = subset.isel(time=slice(0, n)).load()
154
+ logger.info(f"Fetched subset data in {time.perf_counter()-t_fetch:.2f}s")
155
  t_index = t_index[:n]
156
 
157
  # Build output arrays
 
185
  "ilon": int(ilon),
186
  }
187
 
188
+ logger.info(f"Built DataFrame rows={len(df)}")
189
  return df, meta