File size: 16,525 Bytes
2b77e9c
 
 
 
 
 
 
 
 
 
 
 
 
 
1ad5cfb
 
 
 
2b77e9c
 
 
 
 
 
 
 
 
 
 
 
 
 
1ad5cfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b77e9c
 
1ad5cfb
2b77e9c
 
1ad5cfb
 
2b77e9c
1ad5cfb
 
2b77e9c
1ad5cfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b77e9c
1ad5cfb
 
 
 
 
 
 
 
 
2b77e9c
1ad5cfb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b77e9c
 
 
 
 
 
 
 
 
 
1ad5cfb
 
 
2b77e9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ad5cfb
 
 
 
 
 
 
 
 
 
2b77e9c
 
1ad5cfb
 
2b77e9c
1ad5cfb
2b77e9c
 
 
 
1ad5cfb
2b77e9c
 
 
 
1ad5cfb
 
 
 
 
 
 
 
2b77e9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ad5cfb
 
 
 
 
 
2b77e9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ad5cfb
 
 
 
2b77e9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
import gradio as gr
import folium
from folium import plugins
import pandas as pd
import numpy as np
import requests
import xarray as xr
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import io
import base64
from huggingface_hub import hf_hub_download
import tempfile
import os
import ocf_blosc2
from scipy.spatial import cKDTree
import warnings
warnings.filterwarnings('ignore')

def create_map():
    """Create an interactive map centered on Europe"""
    m = folium.Map(
        location=[50.0, 10.0],  # Center on Europe
        zoom_start=4,
        tiles='OpenStreetMap'
    )
    
    # Add click functionality
    m.add_child(folium.ClickForMarker(popup="Click to select location"))
    
    return m

def find_nearest_grid_point(target_lat, target_lon, grid_lats, grid_lons):
    """
    Find the nearest grid point to the target coordinates using KDTree
    """
    try:
        # Convert to radians for proper distance calculation
        target_coords = np.radians([target_lat, target_lon])
        grid_coords = np.column_stack([grid_lats.ravel(), grid_lons.ravel()])
        grid_coords_rad = np.radians(grid_coords)
        
        # Build KDTree and find nearest point
        tree = cKDTree(grid_coords_rad)
        distance, index = tree.query(target_coords)
        
        # Convert back to unraveled indices
        grid_shape = grid_lats.shape
        unravel_idx = np.unravel_index(index, grid_shape)
        
        return unravel_idx
    except Exception as e:
        # Fallback to simple method
        lat_diff = np.abs(grid_lats - target_lat)
        lon_diff = np.abs(grid_lons - target_lon)
        distance = lat_diff + lon_diff
        return np.unravel_index(np.argmin(distance), grid_lats.shape)

def get_latest_available_file():
    """
    Get the most recent available forecast file
    """
    now = datetime.utcnow()
    
    # Try the last few days to find available data
    for days_back in range(0, 5):
        check_date = now - timedelta(days=days_back)
        
        # Try different forecast hours (00, 06, 12, 18)
        for hour in ['18', '12', '06', '00']:
            try:
                date_str = check_date.strftime("%Y%m%d")
                filename = f"data/{check_date.year}/{check_date.month}/{check_date.day}/{date_str}_{hour}.zarr.zip"
                
                # Try to access the file
                file_path = hf_hub_download(
                    repo_id="openclimatefix/dwd-icon-global",
                    filename=filename,
                    repo_type="dataset",
                    cache_dir="./cache"
                )
                return file_path, check_date, hour
                
            except Exception:
                continue
    
    raise Exception("No recent forecast data available")

def get_forecast_data(lat, lon, forecast_hour="00"):
    """
    Fetch real forecast data for given coordinates from DWD ICON Global dataset
    """
    try:
        # Get the latest available file
        file_path, forecast_date, used_hour = get_latest_available_file()
        
        # Load the dataset
        ds = xr.open_zarr(file_path)
        
        # Get coordinate information
        if 'clon' in ds.coords and 'clat' in ds.coords:
            grid_lons = ds.clon.values
            grid_lats = ds.clat.values
        elif 'longitude' in ds.coords and 'latitude' in ds.coords:
            grid_lons = ds.longitude.values
            grid_lats = ds.latitude.values
        else:
            # Try to find coordinate variables
            coord_vars = [var for var in ds.variables if 'lon' in var.lower()]
            if coord_vars:
                grid_lons = ds[coord_vars[0]].values
            coord_vars = [var for var in ds.variables if 'lat' in var.lower()]
            if coord_vars:
                grid_lats = ds[coord_vars[0]].values
        
        # Find nearest grid point
        nearest_idx = find_nearest_grid_point(lat, lon, grid_lats, grid_lons)
        
        # Extract common meteorological variables
        variables = {}
        var_mapping = {
            'temperature': ['t_2m', 't_s', 'temp_2m', 'temperature_2m', 't2m'],
            'humidity': ['relhum_2m', 'rh_2m', 'humidity_2m', 'rh2m', 'qv_2m'],
            'wind_u': ['u_10m', 'u10m', 'wind_u_10m', 'u10'],
            'wind_v': ['v_10m', 'v10m', 'wind_v_10m', 'v10'],
            'pressure': ['pmsl', 'msl', 'pressure_msl', 'ps'],
            'precipitation': ['tot_prec', 'tp', 'precipitation', 'rain_gsp']
        }
        
        extracted_vars = {}
        
        for var_type, possible_names in var_mapping.items():
            for name in possible_names:
                if name in ds.variables:
                    try:
                        data = ds[name]
                        if len(data.dims) >= 2:
                            # Extract time series for nearest point
                            if len(data.dims) == 3:  # time, lat, lon
                                values = data.isel({data.dims[1]: nearest_idx[0], data.dims[2]: nearest_idx[1]})
                            elif len(data.dims) == 2:  # assuming time, spatial
                                flat_idx = np.ravel_multi_index(nearest_idx, grid_lats.shape)
                                values = data.isel({data.dims[1]: flat_idx})
                            else:
                                continue
                            
                            extracted_vars[var_type] = values.values
                            break
                    except Exception:
                        continue
        
        # Convert temperature from Kelvin to Celsius if needed
        if 'temperature' in extracted_vars:
            temp_vals = extracted_vars['temperature']
            if np.mean(temp_vals) > 200:  # Likely in Kelvin
                extracted_vars['temperature'] = temp_vals - 273.15
        
        # Calculate wind speed from u and v components
        if 'wind_u' in extracted_vars and 'wind_v' in extracted_vars:
            wind_speed = np.sqrt(extracted_vars['wind_u']**2 + extracted_vars['wind_v']**2)
            extracted_vars['wind_speed'] = wind_speed
        
        # Convert relative humidity from fraction to percentage if needed
        if 'humidity' in extracted_vars:
            humidity_vals = extracted_vars['humidity']
            if np.max(humidity_vals) <= 1.0:  # Likely in fraction
                extracted_vars['humidity'] = humidity_vals * 100
        
        # Get time coordinates
        if 'time' in ds.coords:
            timestamps = pd.to_datetime(ds.time.values).to_pydatetime()
        elif 'valid_time' in ds.coords:
            timestamps = pd.to_datetime(ds.valid_time.values).to_pydatetime()
        else:
            # Generate timestamps based on forecast hours
            forecast_hours = len(list(extracted_vars.values())[0])
            timestamps = [forecast_date + timedelta(hours=i*3) for i in range(forecast_hours)]
        
        # Ensure we have the main variables, use defaults if missing
        if 'temperature' not in extracted_vars:
            extracted_vars['temperature'] = np.full(len(timestamps), 15.0)
        if 'humidity' not in extracted_vars:
            extracted_vars['humidity'] = np.full(len(timestamps), 60.0)
        if 'wind_speed' not in extracted_vars:
            extracted_vars['wind_speed'] = np.full(len(timestamps), 5.0)
        
        # Limit to reasonable forecast length
        max_hours = min(len(timestamps), 32)  # ~4 days
        
        result = {
            'timestamps': timestamps[:max_hours],
            'temperature': extracted_vars['temperature'][:max_hours],
            'humidity': extracted_vars['humidity'][:max_hours],
            'wind_speed': extracted_vars['wind_speed'][:max_hours],
            'lat': lat,
            'lon': lon,
            'forecast_date': forecast_date.strftime('%Y-%m-%d %H:%M UTC'),
            'nearest_grid_lat': float(grid_lats[nearest_idx]),
            'nearest_grid_lon': float(grid_lons[nearest_idx])
        }
        
        # Add additional variables if available
        if 'pressure' in extracted_vars:
            result['pressure'] = extracted_vars['pressure'][:max_hours]
        if 'precipitation' in extracted_vars:
            result['precipitation'] = extracted_vars['precipitation'][:max_hours]
        
        return result
        
    except Exception as e:
        error_msg = f"Error fetching real forecast data: {str(e)}"
        print(error_msg)  # For debugging
        
        # Return fallback synthetic data with error note
        forecast_days = 4
        hours = np.arange(0, forecast_days * 24, 6)
        np.random.seed(int(lat * 100 + lon * 100))
        
        current_date = datetime.now()
        timestamps = [current_date + timedelta(hours=int(h)) for h in hours]
        temperature = 15 + 10 * np.sin(hours * np.pi / 12) + np.random.normal(0, 2, len(hours))
        humidity = 60 + 20 * np.sin(hours * np.pi / 24 + np.pi/4) + np.random.normal(0, 5, len(hours))
        wind_speed = 5 + 3 * np.sin(hours * np.pi / 18) + np.random.normal(0, 1, len(hours))
        
        return {
            'timestamps': timestamps,
            'temperature': temperature,
            'humidity': humidity,
            'wind_speed': wind_speed,
            'lat': lat,
            'lon': lon,
            'error': error_msg,
            'forecast_date': 'Fallback synthetic data'
        }

def create_forecast_plot(forecast_data):
    """Create forecast visualization plots"""
    if isinstance(forecast_data, str):
        return forecast_data
    
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(12, 8))
    
    timestamps = forecast_data['timestamps']
    
    # Temperature plot
    ax1.plot(timestamps, forecast_data['temperature'], 'r-', linewidth=2)
    ax1.set_title('Temperature Forecast (°C)')
    ax1.set_ylabel('Temperature (°C)')
    ax1.grid(True, alpha=0.3)
    ax1.tick_params(axis='x', rotation=45)
    
    # Humidity plot
    ax2.plot(timestamps, forecast_data['humidity'], 'b-', linewidth=2)
    ax2.set_title('Humidity Forecast (%)')
    ax2.set_ylabel('Humidity (%)')
    ax2.grid(True, alpha=0.3)
    ax2.tick_params(axis='x', rotation=45)
    
    # Wind speed plot
    ax3.plot(timestamps, forecast_data['wind_speed'], 'g-', linewidth=2)
    ax3.set_title('Wind Speed Forecast (m/s)')
    ax3.set_ylabel('Wind Speed (m/s)')
    ax3.grid(True, alpha=0.3)
    ax3.tick_params(axis='x', rotation=45)
    
    # Summary info
    ax4.axis('off')
    
    # Check if we have real data or fallback
    data_source = "Real DWD ICON Data" if 'error' not in forecast_data else "Fallback Synthetic Data"
    forecast_info = forecast_data.get('forecast_date', 'Unknown')
    
    # Grid point info
    grid_info = ""
    if 'nearest_grid_lat' in forecast_data and 'nearest_grid_lon' in forecast_data:
        grid_info = f"Nearest Grid: {forecast_data['nearest_grid_lat']:.2f}°N, {forecast_data['nearest_grid_lon']:.2f}°E\n"
    
    summary_text = f"""
    Location: {forecast_data['lat']:.2f}°N, {forecast_data['lon']:.2f}°E
    {grid_info}Data Source: {data_source}
    Forecast: {forecast_info}
    
    Current Conditions:
    Temperature: {forecast_data['temperature'][0]:.1f}°C
    Humidity: {forecast_data['humidity'][0]:.1f}%
    Wind Speed: {forecast_data['wind_speed'][0]:.1f} m/s
    
    Forecast Range:
    Temp: {min(forecast_data['temperature']):.1f}°C to {max(forecast_data['temperature']):.1f}°C
    Humidity: {min(forecast_data['humidity']):.1f}% to {max(forecast_data['humidity']):.1f}%
    Wind: {min(forecast_data['wind_speed']):.1f} to {max(forecast_data['wind_speed']):.1f} m/s
    """
    
    # Add error info if present
    if 'error' in forecast_data:
        summary_text += f"\n\nNote: Using fallback data due to:\n{forecast_data['error'][:100]}..."
    
    color = 'lightgreen' if 'error' not in forecast_data else 'lightyellow'
    ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes, fontsize=9,
            verticalalignment='top', bbox=dict(boxstyle='round', facecolor=color, alpha=0.7))
    
    plt.tight_layout()
    plt.subplots_adjust(hspace=0.3)
    
    return fig

def process_map_click(lat, lon):
    """Process map click and return forecast"""
    if lat is None or lon is None:
        return "Please click on the map to select a location", None
    
    # Get forecast data
    forecast_data = get_forecast_data(lat, lon)
    
    # Create plot
    plot = create_forecast_plot(forecast_data)
    
    # Create summary text
    if isinstance(forecast_data, dict):
        data_type = "Real DWD ICON Data" if 'error' not in forecast_data else "Fallback Data"
        forecast_info = forecast_data.get('forecast_date', '')
        summary = f"Forecast for location: {lat:.3f}°N, {lon:.3f}°E\n\nUsing: {data_type}\nForecast: {forecast_info}"
        
        if 'error' in forecast_data:
            summary += f"\n\nNote: Real data unavailable - {forecast_data['error'][:150]}..."
    else:
        summary = forecast_data
    
    return summary, plot

def create_attribution_text():
    """Create proper attribution for the dataset"""
    attribution = """
    ## Data Attribution
    
    This application uses data from the **DWD ICON Global** dataset provided by OpenClimateFix.
    
    - **Dataset**: DWD ICON Global Weather Forecasts
    - **Source**: German Weather Service (Deutscher Wetterdienst - DWD)
    - **Provider**: OpenClimateFix
    - **License**: CC-BY-4.0
    - **Dataset URL**: https://huggingface.co/datasets/openclimatefix/dwd-icon-global
    
    **Citation**: Please cite the original DWD ICON model and the OpenClimateFix dataset when using this data.
    
    **Real Data**: This application attempts to fetch real DWD ICON Global forecast data from the OpenClimateFix dataset. 
    If real data is unavailable, it will fall back to synthetic data for demonstration purposes.
    
    **Processing**: The application handles the icosahedral grid by finding the nearest grid point to your selected coordinates.
    """
    return attribution

# Create the Gradio interface
with gr.Blocks(title="DWD ICON Global Weather Forecast") as app:
    gr.Markdown("# 🌦️ DWD ICON Global Weather Forecast")
    gr.Markdown("Click on the map to select a location and view the 4-day weather forecast from the DWD ICON Global model.")
    
    with gr.Row():
        with gr.Column(scale=2):
            # Map component
            map_html = gr.HTML(create_map()._repr_html_(), label="Interactive Map")
            gr.Markdown("👆 Click anywhere on the map to select a location for forecast")
            
        with gr.Column(scale=2):
            # Forecast output
            forecast_text = gr.Textbox(
                label="Forecast Information",
                value="Click on the map to select a location",
                lines=3
            )
            forecast_plot = gr.Plot(label="Weather Forecast Charts")
    
    # Input fields for manual coordinate entry
    with gr.Row():
        lat_input = gr.Number(
            label="Latitude", 
            value=52.5,
            minimum=-90,
            maximum=90,
            step=0.001,
            precision=3
        )
        lon_input = gr.Number(
            label="Longitude", 
            value=13.4,
            minimum=-180,
            maximum=180,
            step=0.001,
            precision=3
        )
        submit_btn = gr.Button("Get Forecast", variant="primary")
    
    # Attribution section
    with gr.Accordion("📋 Data Attribution & Information", open=False):
        gr.Markdown(create_attribution_text())
    
    # Event handlers
    submit_btn.click(
        fn=process_map_click,
        inputs=[lat_input, lon_input],
        outputs=[forecast_text, forecast_plot]
    )
    
    # Example locations
    with gr.Row():
        gr.Examples(
            examples=[
                [52.5200, 13.4050],  # Berlin
                [48.8566, 2.3522],   # Paris
                [51.5074, -0.1278],  # London
                [55.7558, 37.6176],  # Moscow
                [41.9028, 12.4964],  # Rome
            ],
            inputs=[lat_input, lon_input],
            outputs=[forecast_text, forecast_plot],
            fn=process_map_click,
            cache_examples=False,
            label="Try these example locations:"
        )

if __name__ == "__main__":
    app.launch(share=True, server_name="0.0.0.0")