nakas commited on
Commit
4ecd995
·
verified ·
1 Parent(s): 8dc02da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -345
app.py CHANGED
@@ -1,365 +1,66 @@
1
- # app.py
2
- import gradio as gr
3
  import requests
4
- import pandas as pd
5
- import numpy as np
6
- from datetime import datetime, timedelta
7
- import matplotlib.pyplot as plt
8
- from matplotlib.gridspec import GridSpec
9
- import json
10
  from bs4 import BeautifulSoup
11
-
12
- def get_raw_data(station_id):
13
- """
14
- Get raw data from the NWS API
15
- """
16
- headers = {
17
- 'User-Agent': '(Weather Data Viewer, contact@yourdomain.com)',
18
- 'Accept': 'application/json'
19
- }
20
-
21
- # Calculate correct date range for last 3 days
22
- end_time = datetime.utcnow()
23
- start_time = end_time - timedelta(hours=72) # Last 3 days
24
-
25
- params = {
26
- 'start': start_time.isoformat() + 'Z',
27
- 'end': end_time.isoformat() + 'Z'
28
- }
29
-
30
- url = f"https://api.weather.gov/stations/{station_id}/observations"
31
-
32
- try:
33
- print("\nFetching observations...")
34
- print(f"URL: {url}")
35
- print(f"Time range: {start_time} to {end_time}")
36
- response = requests.get(url, headers=headers, params=params)
37
- print(f"Response status: {response.status_code}")
38
-
39
- if response.status_code != 200:
40
- print(f"Response content: {response.text}")
41
- response.raise_for_status()
42
-
43
- data = response.json()
44
-
45
- if 'features' in data:
46
- print(f"\nNumber of observations: {len(data['features'])}")
47
- if len(data['features']) > 0:
48
- print("\nFirst observation properties:")
49
- print(json.dumps(data['features'][0]['properties'], indent=2))
50
-
51
- print("\nAll available property keys:")
52
- keys = set()
53
- for feature in data['features']:
54
- keys.update(feature['properties'].keys())
55
- print(sorted(list(keys)))
56
-
57
- return data
58
- except Exception as e:
59
- print(f"Error fetching data: {e}")
60
- print("Full error details:")
61
- import traceback
62
- traceback.print_exc()
63
- return None
64
 
65
  def scrape_snow_depth():
66
  """
67
- Scrape snow depth data from the HTML timeseries page.
68
- Note: The structure of the HTML page may change over time.
69
  """
70
  url = "https://www.weather.gov/wrh/timeseries?site=YCTIM&hours=720&units=english&chart=on&headers=on&obs=tabular&hourly=false&pview=standard&font=12&plot="
71
- try:
72
- response = requests.get(url)
73
- if response.status_code != 200:
74
- print(f"Failed to fetch HTML page: {response.status_code}")
75
- return pd.DataFrame()
76
- soup = BeautifulSoup(response.text, 'html.parser')
77
- table = soup.find('table')
78
- if table is None:
79
- print("No table found on the page.")
80
- return pd.DataFrame()
81
-
82
- # Extract headers from the table
83
- header_row = table.find('tr')
84
- headers = [th.get_text().strip() for th in header_row.find_all('th')]
85
-
86
- # Look for the columns "Time" and "Snow Depth"
87
- if "Time" not in headers or "Snow Depth" not in headers:
88
- print("Required columns 'Time' or 'Snow Depth' not found in table headers.")
89
- return pd.DataFrame()
90
-
91
- time_idx = headers.index("Time")
92
- snow_idx = headers.index("Snow Depth")
93
-
94
- data = []
95
- rows = table.find_all('tr')[1:] # Skip header row
96
- for row in rows:
97
- cells = row.find_all('td')
98
- if len(cells) < max(time_idx, snow_idx) + 1:
99
- continue
100
- time_str = cells[time_idx].get_text().strip()
101
- snow_str = cells[snow_idx].get_text().strip()
102
- data.append((time_str, snow_str))
103
-
104
- df = pd.DataFrame(data, columns=["Time", "snowDepth"])
105
- # Convert "Time" column to datetime. Adjust the format if necessary.
106
- try:
107
- df["timestamp"] = pd.to_datetime(df["Time"])
108
- except Exception as e:
109
- print("Error parsing timestamp from scraped data:", e)
110
- df["timestamp"] = pd.NaT
111
- df["snowDepth"] = pd.to_numeric(df["snowDepth"], errors='coerce')
112
- print("Scraped snow depth data:")
113
- print(df.head())
114
- return df[["timestamp", "snowDepth"]]
115
- except Exception as e:
116
- print(f"Error scraping snow depth: {e}")
117
- return pd.DataFrame()
118
-
119
- def parse_raw_data(data):
120
- """
121
- Parse the raw JSON data into a DataFrame
122
- """
123
- if not data or 'features' not in data:
124
- return None
125
-
126
- records = []
127
- for feature in data['features']:
128
- props = feature['properties']
129
-
130
- # Extract all properties starting with 'snow'
131
- snow_fields = {k: v for k, v in props.items() if 'snow' in k.lower()}
132
- if snow_fields:
133
- print("\nFound snow-related fields:")
134
- for k, v in snow_fields.items():
135
- print(f"{k}: {v}")
136
-
137
- record = {
138
- 'timestamp': props['timestamp'],
139
- 'temperature': props.get('temperature', {}).get('value'),
140
- 'wind_speed': props.get('windSpeed', {}).get('value'),
141
- 'wind_direction': props.get('windDirection', {}).get('value')
142
- }
143
-
144
- # Add all snow-related fields to the record
145
- for k, v in snow_fields.items():
146
- if isinstance(v, dict) and 'value' in v:
147
- record[k] = v['value']
148
- else:
149
- record[k] = v
150
-
151
- records.append(record)
152
-
153
- df = pd.DataFrame(records)
154
 
155
- print("\nDataFrame columns:")
156
- print(df.columns.tolist())
157
- print("\nSample of raw data:")
158
- print(df.head())
159
-
160
- return df
161
-
162
- def process_weather_data(df):
163
- """
164
- Process the weather DataFrame
165
- """
166
- if df is None or df.empty:
167
  return None
168
-
169
- # Convert timestamp
170
- df['timestamp'] = pd.to_datetime(df['timestamp'])
171
- df['date'] = df['timestamp'].dt.date
172
-
173
- # Convert temperature from C to F if not null
174
- if df['temperature'].notna().all():
175
- df['temperature'] = (df['temperature'] * 9/5) + 32
176
-
177
- # Convert wind speed from km/h to mph if not null (original unit km/h)
178
- if df['wind_speed'].notna().all():
179
- df['wind_speed'] = df['wind_speed'] * 0.621371 # km/h to mph
180
-
181
- return df
182
-
183
- def create_wind_rose(ax, data, title):
184
- """
185
- Create a wind rose subplot
186
- """
187
- if data.empty or data['wind_direction'].isna().all() or data['wind_speed'].isna().all():
188
- ax.text(0.5, 0.5, 'No wind data available',
189
- horizontalalignment='center',
190
- verticalalignment='center',
191
- transform=ax.transAxes)
192
- ax.set_title(title)
193
- return
194
-
195
- plot_data = data.copy()
196
-
197
- direction_bins = np.arange(0, 361, 45)
198
- directions = ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']
199
-
200
- mask = plot_data['wind_direction'].notna() & plot_data['wind_speed'].notna()
201
- plot_data = plot_data[mask]
202
 
203
- if plot_data.empty:
204
- ax.text(0.5, 0.5, 'No valid wind data',
205
- horizontalalignment='center',
206
- verticalalignment='center',
207
- transform=ax.transAxes)
208
- ax.set_title(title)
209
- return
210
 
211
- plot_data.loc[:, 'direction_bin'] = pd.cut(plot_data['wind_direction'],
212
- bins=direction_bins,
213
- labels=directions,
214
- include_lowest=True)
215
-
216
- wind_stats = plot_data.groupby('direction_bin', observed=True)['wind_speed'].mean()
217
-
218
- all_directions = pd.Series(0.0, index=directions)
219
- wind_stats = wind_stats.combine_first(all_directions)
220
-
221
- angles = np.linspace(0, 2*np.pi, len(directions), endpoint=False)
222
- values = [wind_stats[d] for d in directions]
223
-
224
- if any(v > 0 for v in values):
225
- bars = ax.bar(angles, values, width=0.5, alpha=0.6)
226
- ax.set_xticks(angles)
227
- ax.set_xticklabels(directions)
228
- else:
229
- ax.text(0.5, 0.5, 'No significant wind',
230
- horizontalalignment='center',
231
- verticalalignment='center',
232
- transform=ax.transAxes)
233
-
234
- ax.set_title(title)
235
 
236
- def create_visualizations(df):
237
- """
238
- Create static visualizations using matplotlib
239
- """
240
- fig = plt.figure(figsize=(20, 24))
241
- gs = GridSpec(5, 2, figure=fig)
242
-
243
- ax1 = fig.add_subplot(gs[0, :])
244
- ax2 = fig.add_subplot(gs[1, :])
245
- ax3 = fig.add_subplot(gs[2, :])
246
-
247
- if not df['temperature'].isna().all():
248
- ax1.plot(df['timestamp'], df['temperature'], linewidth=2)
249
- ax1.set_title('Temperature Over Time')
250
- ax1.set_ylabel('Temperature (°F)')
251
- ax1.set_xlabel('')
252
- ax1.grid(True)
253
-
254
- if not df['wind_speed'].isna().all():
255
- ax2.plot(df['timestamp'], df['wind_speed'], linewidth=2)
256
- ax2.set_title('Wind Speed Over Time')
257
- ax2.set_ylabel('Wind Speed (mph)')
258
- ax2.set_xlabel('')
259
- ax2.grid(True)
260
-
261
- # Plot snow depth if available
262
- if 'snowDepth' in df.columns and not df['snowDepth'].isna().all():
263
- ax3.plot(df['timestamp'], df['snowDepth'], linewidth=2)
264
- ax3.set_ylim(0, 80) # Fixed y-axis limit to 80 inches
265
- else:
266
- ax3.text(0.5, 0.5, 'No snow depth data available',
267
- horizontalalignment='center',
268
- verticalalignment='center',
269
- transform=ax3.transAxes)
270
- ax3.set_title('Snow Depth')
271
- ax3.set_ylabel('Snow Depth (inches)')
272
- ax3.set_xlabel('')
273
- ax3.grid(True)
274
-
275
- for ax in [ax1, ax2, ax3]:
276
- ax.tick_params(axis='x', rotation=45)
277
- ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y-%m-%d %H:%M'))
278
-
279
- dates = sorted(df['date'].unique())
280
- wind_axes = []
281
- for i, date in enumerate(dates):
282
- if i < 2:
283
- ax = fig.add_subplot(gs[4, i], projection='polar')
284
- day_data = df[df['date'] == date].copy()
285
- create_wind_rose(ax, day_data, pd.to_datetime(date).strftime('%Y-%m-%d'))
286
- wind_axes.append(ax)
287
 
288
- plt.tight_layout()
289
-
290
- return fig
291
-
292
- def get_weather_data(station_id, hours):
293
- """
294
- Main function to get and process weather data
295
- """
296
- try:
297
- # Get raw data from API
298
- raw_data = get_raw_data(station_id)
299
- if raw_data is None:
300
- return None, "Failed to fetch data"
301
-
302
- # Parse raw data
303
- df = parse_raw_data(raw_data)
304
- if df is None:
305
- return None, "Failed to parse data"
306
-
307
- # Process API data
308
- df = process_weather_data(df)
309
- if df is None:
310
- return None, "Failed to process data"
311
-
312
- # Scrape snow depth data and merge with API data
313
- snow_df = scrape_snow_depth()
314
- if not snow_df.empty:
315
- df = df.sort_values('timestamp')
316
- snow_df = snow_df.sort_values('timestamp')
317
- # Merge using nearest timestamp within a 30-minute tolerance
318
- df = pd.merge_asof(df, snow_df, on='timestamp', tolerance=pd.Timedelta('30min'), direction='nearest')
319
-
320
- print("\nProcessed data sample:")
321
- print(df.head())
322
-
323
- return df, None
324
-
325
- except Exception as e:
326
- return None, f"Error: {str(e)}"
327
 
328
- def fetch_and_display(station_id, hours):
329
- """
330
- Main function to fetch data and create visualization
331
- """
332
- df, error = get_weather_data(station_id, hours)
333
 
334
- if error:
335
- return None, error
 
 
 
 
 
 
 
336
 
337
- if df is not None and not df.empty:
338
- fig = create_visualizations(df)
339
- return fig, "Data fetched successfully!"
340
-
341
- return None, "No data available for the specified parameters."
342
-
343
- # Create Gradio interface
344
- with gr.Blocks() as demo:
345
- gr.Markdown("# Weather Data Viewer")
346
- gr.Markdown("Displays temperature, wind speed, and snow data from NWS stations")
347
 
348
- with gr.Row():
349
- station_id = gr.Textbox(label="Station ID", value="YCTIM")
350
- hours = gr.Slider(minimum=24, maximum=168, value=72,
351
- label="Hours of Data", step=24)
352
 
353
- fetch_btn = gr.Button("Fetch Data")
 
354
 
355
- plot_output = gr.Plot()
356
- message = gr.Textbox(label="Status")
357
-
358
- fetch_btn.click(
359
- fn=fetch_and_display,
360
- inputs=[station_id, hours],
361
- outputs=[plot_output, message]
362
- )
363
 
364
- # Launch the app
365
- demo.launch()
 
 
 
 
 
 
1
+ # scrape_snow_depth.py
 
2
  import requests
 
 
 
 
 
 
3
  from bs4 import BeautifulSoup
4
+ import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def scrape_snow_depth():
7
  """
8
+ Scrapes the snow depth data from the weather.gov timeseries page for station YCTIM.
9
+ It extracts the 'Time' and 'Snow Depth' columns from the table.
10
  """
11
  url = "https://www.weather.gov/wrh/timeseries?site=YCTIM&hours=720&units=english&chart=on&headers=on&obs=tabular&hourly=false&pview=standard&font=12&plot="
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ # Get the webpage content
14
+ response = requests.get(url)
15
+ if response.status_code != 200:
16
+ print(f"Error: Could not fetch the page. Status code: {response.status_code}")
 
 
 
 
 
 
 
 
17
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ # Parse the HTML content
20
+ soup = BeautifulSoup(response.text, "html.parser")
 
 
 
 
 
21
 
22
+ # Find the first table on the page
23
+ table = soup.find("table")
24
+ if table is None:
25
+ print("No table found on the page.")
26
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # Extract the header row to identify the column indices for 'Time' and 'Snow Depth'
29
+ header_row = table.find("tr")
30
+ headers = [th.get_text(strip=True) for th in header_row.find_all("th")]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ if "Time" not in headers or "Snow Depth" not in headers:
33
+ print("Required columns 'Time' or 'Snow Depth' not found in the table headers.")
34
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ time_index = headers.index("Time")
37
+ snow_depth_index = headers.index("Snow Depth")
 
 
 
38
 
39
+ # Extract data rows (skip the header)
40
+ rows = []
41
+ for tr in table.find_all("tr")[1:]:
42
+ cells = tr.find_all("td")
43
+ # Ensure there are enough cells in the row
44
+ if len(cells) > max(time_index, snow_depth_index):
45
+ time_text = cells[time_index].get_text(strip=True)
46
+ snow_text = cells[snow_depth_index].get_text(strip=True)
47
+ rows.append((time_text, snow_text))
48
 
49
+ # Create a DataFrame from the extracted data
50
+ df = pd.DataFrame(rows, columns=["Time", "Snow Depth"])
 
 
 
 
 
 
 
 
51
 
52
+ # Convert the "Time" column to datetime objects
53
+ df["Time"] = pd.to_datetime(df["Time"], errors="coerce")
 
 
54
 
55
+ # Convert "Snow Depth" to a numeric value (in inches)
56
+ df["Snow Depth"] = pd.to_numeric(df["Snow Depth"], errors="coerce")
57
 
58
+ return df
 
 
 
 
 
 
 
59
 
60
+ if __name__ == "__main__":
61
+ snow_df = scrape_snow_depth()
62
+ if snow_df is not None:
63
+ print("Sample of scraped snow depth data:")
64
+ print(snow_df.head())
65
+ else:
66
+ print("No snow depth data could be scraped.")