Spaces:
Sleeping
Sleeping
Add comprehensive logging throughout the app
Browse files- Detailed logging for dataset loading
- Log all data operations and transformations
- Track errors with full tracebacks
- Log variable shapes, dimensions, and value ranges
- Help debug data loading issues
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
app.py
CHANGED
|
@@ -5,8 +5,18 @@ import numpy as np
|
|
| 5 |
import plotly.graph_objects as go
|
| 6 |
from datetime import datetime, timedelta
|
| 7 |
import warnings
|
|
|
|
|
|
|
|
|
|
| 8 |
warnings.filterwarnings('ignore')
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# Catalog configuration with correct zarr store URLs
|
| 11 |
CATALOG = {
|
| 12 |
"NOAA GFS Analysis (Hourly)": {
|
|
@@ -41,51 +51,88 @@ dataset_cache = {}
|
|
| 41 |
|
| 42 |
def load_dataset(dataset_name, use_cache=True):
|
| 43 |
"""Load a dataset from the Dynamical catalog"""
|
|
|
|
|
|
|
| 44 |
if use_cache and dataset_name in dataset_cache:
|
| 45 |
-
|
|
|
|
| 46 |
|
| 47 |
try:
|
| 48 |
url = CATALOG[dataset_name]["url"]
|
|
|
|
|
|
|
| 49 |
ds = xr.open_zarr(url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
if use_cache:
|
| 51 |
dataset_cache[dataset_name] = ds
|
| 52 |
-
|
|
|
|
|
|
|
| 53 |
except Exception as e:
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
|
| 56 |
def create_map_visualization(dataset_name, variable, time_index=0):
|
| 57 |
"""Create an interactive map visualization of the selected variable"""
|
|
|
|
|
|
|
|
|
|
| 58 |
try:
|
| 59 |
-
ds = load_dataset(dataset_name)
|
| 60 |
if ds is None:
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
# Check if variable exists
|
| 64 |
if variable not in ds.variables:
|
| 65 |
available_vars = list(ds.data_vars)
|
|
|
|
| 66 |
return None, f"Variable '{variable}' not found. Available: {available_vars}"
|
| 67 |
|
|
|
|
|
|
|
| 68 |
# Get the data
|
| 69 |
data_var = ds[variable]
|
|
|
|
| 70 |
|
| 71 |
# Handle time dimension
|
| 72 |
if 'time' in data_var.dims:
|
|
|
|
| 73 |
if time_index >= len(ds.time):
|
| 74 |
time_index = 0
|
| 75 |
data_var = data_var.isel(time=time_index)
|
|
|
|
| 76 |
|
| 77 |
# Handle ensemble dimension if present
|
| 78 |
if 'ensemble' in data_var.dims:
|
|
|
|
| 79 |
data_var = data_var.isel(ensemble=0)
|
| 80 |
|
|
|
|
|
|
|
| 81 |
# Load data into memory (subsample for performance)
|
| 82 |
step = max(1, len(ds.latitude) // 200) # Limit to ~200 points per dimension
|
|
|
|
| 83 |
data_var = data_var.isel(latitude=slice(None, None, step), longitude=slice(None, None, step))
|
|
|
|
| 84 |
data_values = data_var.compute().values
|
|
|
|
| 85 |
|
| 86 |
# Get coordinates
|
| 87 |
lats = ds.latitude.isel(latitude=slice(None, None, step)).values
|
| 88 |
lons = ds.longitude.isel(longitude=slice(None, None, step)).values
|
|
|
|
| 89 |
|
| 90 |
# Create plotly figure
|
| 91 |
fig = go.Figure(data=go.Heatmap(
|
|
@@ -109,34 +156,53 @@ def create_map_visualization(dataset_name, variable, time_index=0):
|
|
| 109 |
hovermode='closest'
|
| 110 |
)
|
| 111 |
|
|
|
|
| 112 |
return fig, f"Successfully loaded {dataset_name}"
|
| 113 |
|
| 114 |
except Exception as e:
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
def get_point_forecast(dataset_name, lat, lon, variable):
|
| 118 |
"""Get forecast data for a specific point"""
|
|
|
|
|
|
|
|
|
|
| 119 |
try:
|
| 120 |
-
ds = load_dataset(dataset_name)
|
| 121 |
if ds is None:
|
| 122 |
-
|
|
|
|
| 123 |
|
| 124 |
if variable not in ds.variables:
|
|
|
|
| 125 |
return None, f"Variable '{variable}' not found in dataset"
|
| 126 |
|
|
|
|
|
|
|
| 127 |
# Find nearest point
|
| 128 |
data_var = ds[variable].sel(latitude=lat, longitude=lon, method='nearest')
|
| 129 |
|
| 130 |
# Handle ensemble dimension
|
| 131 |
if 'ensemble' in data_var.dims:
|
|
|
|
| 132 |
data_var = data_var.isel(ensemble=0)
|
| 133 |
|
|
|
|
|
|
|
| 134 |
# Load data
|
|
|
|
| 135 |
data_values = data_var.compute().values
|
|
|
|
| 136 |
|
| 137 |
# Create time series plot
|
| 138 |
if 'time' in ds[variable].dims:
|
| 139 |
times = pd.to_datetime(ds.time.values)
|
|
|
|
| 140 |
|
| 141 |
fig = go.Figure()
|
| 142 |
fig.add_trace(go.Scatter(
|
|
@@ -160,23 +226,36 @@ def get_point_forecast(dataset_name, lat, lon, variable):
|
|
| 160 |
variable: data_values
|
| 161 |
})
|
| 162 |
|
|
|
|
| 163 |
return fig, df.to_html(index=False)
|
| 164 |
else:
|
|
|
|
| 165 |
return None, f"No time dimension found for {variable}"
|
| 166 |
|
| 167 |
except Exception as e:
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
def update_available_variables(dataset_name):
|
| 171 |
"""Update the variable dropdown based on selected dataset"""
|
|
|
|
|
|
|
| 172 |
try:
|
| 173 |
-
ds = load_dataset(dataset_name, use_cache=False)
|
| 174 |
if ds is None:
|
|
|
|
| 175 |
return gr.Dropdown(choices=CATALOG[dataset_name]["variables"], value=CATALOG[dataset_name]["variables"][0])
|
| 176 |
|
| 177 |
available_vars = list(ds.data_vars)
|
|
|
|
| 178 |
return gr.Dropdown(choices=available_vars, value=available_vars[0] if available_vars else None)
|
| 179 |
-
except:
|
|
|
|
|
|
|
| 180 |
return gr.Dropdown(choices=CATALOG[dataset_name]["variables"], value=CATALOG[dataset_name]["variables"][0])
|
| 181 |
|
| 182 |
# Create Gradio interface
|
|
|
|
| 5 |
import plotly.graph_objects as go
|
| 6 |
from datetime import datetime, timedelta
|
| 7 |
import warnings
|
| 8 |
+
import logging
|
| 9 |
+
import traceback
|
| 10 |
+
|
| 11 |
warnings.filterwarnings('ignore')
|
| 12 |
|
| 13 |
+
# Set up detailed logging
|
| 14 |
+
logging.basicConfig(
|
| 15 |
+
level=logging.INFO,
|
| 16 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 17 |
+
)
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
# Catalog configuration with correct zarr store URLs
|
| 21 |
CATALOG = {
|
| 22 |
"NOAA GFS Analysis (Hourly)": {
|
|
|
|
| 51 |
|
| 52 |
def load_dataset(dataset_name, use_cache=True):
|
| 53 |
"""Load a dataset from the Dynamical catalog"""
|
| 54 |
+
logger.info(f"=== Loading dataset: {dataset_name} ===")
|
| 55 |
+
|
| 56 |
if use_cache and dataset_name in dataset_cache:
|
| 57 |
+
logger.info(f"Dataset found in cache: {dataset_name}")
|
| 58 |
+
return dataset_cache[dataset_name], None
|
| 59 |
|
| 60 |
try:
|
| 61 |
url = CATALOG[dataset_name]["url"]
|
| 62 |
+
logger.info(f"Opening zarr store at: {url}")
|
| 63 |
+
|
| 64 |
ds = xr.open_zarr(url)
|
| 65 |
+
logger.info(f"Successfully opened zarr store")
|
| 66 |
+
logger.info(f"Dataset dimensions: {dict(ds.dims)}")
|
| 67 |
+
logger.info(f"Dataset variables: {list(ds.data_vars)}")
|
| 68 |
+
logger.info(f"Dataset coordinates: {list(ds.coords)}")
|
| 69 |
+
|
| 70 |
if use_cache:
|
| 71 |
dataset_cache[dataset_name] = ds
|
| 72 |
+
logger.info(f"Dataset cached: {dataset_name}")
|
| 73 |
+
|
| 74 |
+
return ds, None
|
| 75 |
except Exception as e:
|
| 76 |
+
error_msg = f"Error loading dataset: {str(e)}"
|
| 77 |
+
logger.error(f"=== ERROR loading {dataset_name} ===")
|
| 78 |
+
logger.error(f"URL: {CATALOG[dataset_name]['url']}")
|
| 79 |
+
logger.error(f"Exception type: {type(e).__name__}")
|
| 80 |
+
logger.error(f"Exception message: {str(e)}")
|
| 81 |
+
logger.error(f"Traceback:\n{traceback.format_exc()}")
|
| 82 |
+
return None, error_msg
|
| 83 |
|
| 84 |
def create_map_visualization(dataset_name, variable, time_index=0):
|
| 85 |
"""Create an interactive map visualization of the selected variable"""
|
| 86 |
+
logger.info(f"=== Creating map visualization ===")
|
| 87 |
+
logger.info(f"Dataset: {dataset_name}, Variable: {variable}, Time index: {time_index}")
|
| 88 |
+
|
| 89 |
try:
|
| 90 |
+
ds, error = load_dataset(dataset_name)
|
| 91 |
if ds is None:
|
| 92 |
+
logger.error(f"Dataset loading returned None: {error}")
|
| 93 |
+
return None, f"Error loading dataset: {dataset_name}\n{error}"
|
| 94 |
+
|
| 95 |
+
logger.info(f"Dataset loaded successfully")
|
| 96 |
|
| 97 |
# Check if variable exists
|
| 98 |
if variable not in ds.variables:
|
| 99 |
available_vars = list(ds.data_vars)
|
| 100 |
+
logger.error(f"Variable '{variable}' not found. Available: {available_vars}")
|
| 101 |
return None, f"Variable '{variable}' not found. Available: {available_vars}"
|
| 102 |
|
| 103 |
+
logger.info(f"Variable '{variable}' found in dataset")
|
| 104 |
+
|
| 105 |
# Get the data
|
| 106 |
data_var = ds[variable]
|
| 107 |
+
logger.info(f"Variable shape: {data_var.shape}, dims: {data_var.dims}")
|
| 108 |
|
| 109 |
# Handle time dimension
|
| 110 |
if 'time' in data_var.dims:
|
| 111 |
+
logger.info(f"Time dimension found, length: {len(ds.time)}")
|
| 112 |
if time_index >= len(ds.time):
|
| 113 |
time_index = 0
|
| 114 |
data_var = data_var.isel(time=time_index)
|
| 115 |
+
logger.info(f"Selected time index: {time_index}")
|
| 116 |
|
| 117 |
# Handle ensemble dimension if present
|
| 118 |
if 'ensemble' in data_var.dims:
|
| 119 |
+
logger.info(f"Ensemble dimension found, selecting ensemble 0")
|
| 120 |
data_var = data_var.isel(ensemble=0)
|
| 121 |
|
| 122 |
+
logger.info(f"Data variable shape after slicing: {data_var.shape}")
|
| 123 |
+
|
| 124 |
# Load data into memory (subsample for performance)
|
| 125 |
step = max(1, len(ds.latitude) // 200) # Limit to ~200 points per dimension
|
| 126 |
+
logger.info(f"Subsampling with step: {step}")
|
| 127 |
data_var = data_var.isel(latitude=slice(None, None, step), longitude=slice(None, None, step))
|
| 128 |
+
logger.info(f"Computing data values...")
|
| 129 |
data_values = data_var.compute().values
|
| 130 |
+
logger.info(f"Data values shape: {data_values.shape}, min: {data_values.min()}, max: {data_values.max()}")
|
| 131 |
|
| 132 |
# Get coordinates
|
| 133 |
lats = ds.latitude.isel(latitude=slice(None, None, step)).values
|
| 134 |
lons = ds.longitude.isel(longitude=slice(None, None, step)).values
|
| 135 |
+
logger.info(f"Lat range: [{lats.min()}, {lats.max()}], Lon range: [{lons.min()}, {lons.max()}]")
|
| 136 |
|
| 137 |
# Create plotly figure
|
| 138 |
fig = go.Figure(data=go.Heatmap(
|
|
|
|
| 156 |
hovermode='closest'
|
| 157 |
)
|
| 158 |
|
| 159 |
+
logger.info(f"Map visualization created successfully")
|
| 160 |
return fig, f"Successfully loaded {dataset_name}"
|
| 161 |
|
| 162 |
except Exception as e:
|
| 163 |
+
error_msg = f"Error creating visualization: {str(e)}"
|
| 164 |
+
logger.error(f"=== ERROR creating visualization ===")
|
| 165 |
+
logger.error(f"Exception type: {type(e).__name__}")
|
| 166 |
+
logger.error(f"Exception message: {str(e)}")
|
| 167 |
+
logger.error(f"Traceback:\n{traceback.format_exc()}")
|
| 168 |
+
return None, error_msg
|
| 169 |
|
| 170 |
def get_point_forecast(dataset_name, lat, lon, variable):
|
| 171 |
"""Get forecast data for a specific point"""
|
| 172 |
+
logger.info(f"=== Getting point forecast ===")
|
| 173 |
+
logger.info(f"Dataset: {dataset_name}, Lat: {lat}, Lon: {lon}, Variable: {variable}")
|
| 174 |
+
|
| 175 |
try:
|
| 176 |
+
ds, error = load_dataset(dataset_name)
|
| 177 |
if ds is None:
|
| 178 |
+
logger.error(f"Dataset loading failed: {error}")
|
| 179 |
+
return None, f"Error loading dataset: {error}"
|
| 180 |
|
| 181 |
if variable not in ds.variables:
|
| 182 |
+
logger.error(f"Variable '{variable}' not found in dataset")
|
| 183 |
return None, f"Variable '{variable}' not found in dataset"
|
| 184 |
|
| 185 |
+
logger.info(f"Selecting nearest point to ({lat}, {lon})")
|
| 186 |
+
|
| 187 |
# Find nearest point
|
| 188 |
data_var = ds[variable].sel(latitude=lat, longitude=lon, method='nearest')
|
| 189 |
|
| 190 |
# Handle ensemble dimension
|
| 191 |
if 'ensemble' in data_var.dims:
|
| 192 |
+
logger.info(f"Handling ensemble dimension")
|
| 193 |
data_var = data_var.isel(ensemble=0)
|
| 194 |
|
| 195 |
+
logger.info(f"Point data shape: {data_var.shape}, dims: {data_var.dims}")
|
| 196 |
+
|
| 197 |
# Load data
|
| 198 |
+
logger.info(f"Computing point data values...")
|
| 199 |
data_values = data_var.compute().values
|
| 200 |
+
logger.info(f"Point data computed, shape: {data_values.shape}")
|
| 201 |
|
| 202 |
# Create time series plot
|
| 203 |
if 'time' in ds[variable].dims:
|
| 204 |
times = pd.to_datetime(ds.time.values)
|
| 205 |
+
logger.info(f"Creating time series plot with {len(times)} time steps")
|
| 206 |
|
| 207 |
fig = go.Figure()
|
| 208 |
fig.add_trace(go.Scatter(
|
|
|
|
| 226 |
variable: data_values
|
| 227 |
})
|
| 228 |
|
| 229 |
+
logger.info(f"Point forecast created successfully")
|
| 230 |
return fig, df.to_html(index=False)
|
| 231 |
else:
|
| 232 |
+
logger.warning(f"No time dimension found for {variable}")
|
| 233 |
return None, f"No time dimension found for {variable}"
|
| 234 |
|
| 235 |
except Exception as e:
|
| 236 |
+
error_msg = f"Error getting point forecast: {str(e)}"
|
| 237 |
+
logger.error(f"=== ERROR getting point forecast ===")
|
| 238 |
+
logger.error(f"Exception type: {type(e).__name__}")
|
| 239 |
+
logger.error(f"Exception message: {str(e)}")
|
| 240 |
+
logger.error(f"Traceback:\n{traceback.format_exc()}")
|
| 241 |
+
return None, error_msg
|
| 242 |
|
| 243 |
def update_available_variables(dataset_name):
|
| 244 |
"""Update the variable dropdown based on selected dataset"""
|
| 245 |
+
logger.info(f"=== Updating available variables for {dataset_name} ===")
|
| 246 |
+
|
| 247 |
try:
|
| 248 |
+
ds, error = load_dataset(dataset_name, use_cache=False)
|
| 249 |
if ds is None:
|
| 250 |
+
logger.warning(f"Could not load dataset, using default variables: {error}")
|
| 251 |
return gr.Dropdown(choices=CATALOG[dataset_name]["variables"], value=CATALOG[dataset_name]["variables"][0])
|
| 252 |
|
| 253 |
available_vars = list(ds.data_vars)
|
| 254 |
+
logger.info(f"Available variables: {available_vars}")
|
| 255 |
return gr.Dropdown(choices=available_vars, value=available_vars[0] if available_vars else None)
|
| 256 |
+
except Exception as e:
|
| 257 |
+
logger.error(f"Error updating variables: {str(e)}")
|
| 258 |
+
logger.error(f"Traceback:\n{traceback.format_exc()}")
|
| 259 |
return gr.Dropdown(choices=CATALOG[dataset_name]["variables"], value=CATALOG[dataset_name]["variables"][0])
|
| 260 |
|
| 261 |
# Create Gradio interface
|