Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,365 +1,66 @@
|
|
| 1 |
-
#
|
| 2 |
-
import gradio as gr
|
| 3 |
import requests
|
| 4 |
-
import pandas as pd
|
| 5 |
-
import numpy as np
|
| 6 |
-
from datetime import datetime, timedelta
|
| 7 |
-
import matplotlib.pyplot as plt
|
| 8 |
-
from matplotlib.gridspec import GridSpec
|
| 9 |
-
import json
|
| 10 |
from bs4 import BeautifulSoup
|
| 11 |
-
|
| 12 |
-
def get_raw_data(station_id):
|
| 13 |
-
"""
|
| 14 |
-
Get raw data from the NWS API
|
| 15 |
-
"""
|
| 16 |
-
headers = {
|
| 17 |
-
'User-Agent': '(Weather Data Viewer, contact@yourdomain.com)',
|
| 18 |
-
'Accept': 'application/json'
|
| 19 |
-
}
|
| 20 |
-
|
| 21 |
-
# Calculate correct date range for last 3 days
|
| 22 |
-
end_time = datetime.utcnow()
|
| 23 |
-
start_time = end_time - timedelta(hours=72) # Last 3 days
|
| 24 |
-
|
| 25 |
-
params = {
|
| 26 |
-
'start': start_time.isoformat() + 'Z',
|
| 27 |
-
'end': end_time.isoformat() + 'Z'
|
| 28 |
-
}
|
| 29 |
-
|
| 30 |
-
url = f"https://api.weather.gov/stations/{station_id}/observations"
|
| 31 |
-
|
| 32 |
-
try:
|
| 33 |
-
print("\nFetching observations...")
|
| 34 |
-
print(f"URL: {url}")
|
| 35 |
-
print(f"Time range: {start_time} to {end_time}")
|
| 36 |
-
response = requests.get(url, headers=headers, params=params)
|
| 37 |
-
print(f"Response status: {response.status_code}")
|
| 38 |
-
|
| 39 |
-
if response.status_code != 200:
|
| 40 |
-
print(f"Response content: {response.text}")
|
| 41 |
-
response.raise_for_status()
|
| 42 |
-
|
| 43 |
-
data = response.json()
|
| 44 |
-
|
| 45 |
-
if 'features' in data:
|
| 46 |
-
print(f"\nNumber of observations: {len(data['features'])}")
|
| 47 |
-
if len(data['features']) > 0:
|
| 48 |
-
print("\nFirst observation properties:")
|
| 49 |
-
print(json.dumps(data['features'][0]['properties'], indent=2))
|
| 50 |
-
|
| 51 |
-
print("\nAll available property keys:")
|
| 52 |
-
keys = set()
|
| 53 |
-
for feature in data['features']:
|
| 54 |
-
keys.update(feature['properties'].keys())
|
| 55 |
-
print(sorted(list(keys)))
|
| 56 |
-
|
| 57 |
-
return data
|
| 58 |
-
except Exception as e:
|
| 59 |
-
print(f"Error fetching data: {e}")
|
| 60 |
-
print("Full error details:")
|
| 61 |
-
import traceback
|
| 62 |
-
traceback.print_exc()
|
| 63 |
-
return None
|
| 64 |
|
| 65 |
def scrape_snow_depth():
|
| 66 |
"""
|
| 67 |
-
|
| 68 |
-
|
| 69 |
"""
|
| 70 |
url = "https://www.weather.gov/wrh/timeseries?site=YCTIM&hours=720&units=english&chart=on&headers=on&obs=tabular&hourly=false&pview=standard&font=12&plot="
|
| 71 |
-
try:
|
| 72 |
-
response = requests.get(url)
|
| 73 |
-
if response.status_code != 200:
|
| 74 |
-
print(f"Failed to fetch HTML page: {response.status_code}")
|
| 75 |
-
return pd.DataFrame()
|
| 76 |
-
soup = BeautifulSoup(response.text, 'html.parser')
|
| 77 |
-
table = soup.find('table')
|
| 78 |
-
if table is None:
|
| 79 |
-
print("No table found on the page.")
|
| 80 |
-
return pd.DataFrame()
|
| 81 |
-
|
| 82 |
-
# Extract headers from the table
|
| 83 |
-
header_row = table.find('tr')
|
| 84 |
-
headers = [th.get_text().strip() for th in header_row.find_all('th')]
|
| 85 |
-
|
| 86 |
-
# Look for the columns "Time" and "Snow Depth"
|
| 87 |
-
if "Time" not in headers or "Snow Depth" not in headers:
|
| 88 |
-
print("Required columns 'Time' or 'Snow Depth' not found in table headers.")
|
| 89 |
-
return pd.DataFrame()
|
| 90 |
-
|
| 91 |
-
time_idx = headers.index("Time")
|
| 92 |
-
snow_idx = headers.index("Snow Depth")
|
| 93 |
-
|
| 94 |
-
data = []
|
| 95 |
-
rows = table.find_all('tr')[1:] # Skip header row
|
| 96 |
-
for row in rows:
|
| 97 |
-
cells = row.find_all('td')
|
| 98 |
-
if len(cells) < max(time_idx, snow_idx) + 1:
|
| 99 |
-
continue
|
| 100 |
-
time_str = cells[time_idx].get_text().strip()
|
| 101 |
-
snow_str = cells[snow_idx].get_text().strip()
|
| 102 |
-
data.append((time_str, snow_str))
|
| 103 |
-
|
| 104 |
-
df = pd.DataFrame(data, columns=["Time", "snowDepth"])
|
| 105 |
-
# Convert "Time" column to datetime. Adjust the format if necessary.
|
| 106 |
-
try:
|
| 107 |
-
df["timestamp"] = pd.to_datetime(df["Time"])
|
| 108 |
-
except Exception as e:
|
| 109 |
-
print("Error parsing timestamp from scraped data:", e)
|
| 110 |
-
df["timestamp"] = pd.NaT
|
| 111 |
-
df["snowDepth"] = pd.to_numeric(df["snowDepth"], errors='coerce')
|
| 112 |
-
print("Scraped snow depth data:")
|
| 113 |
-
print(df.head())
|
| 114 |
-
return df[["timestamp", "snowDepth"]]
|
| 115 |
-
except Exception as e:
|
| 116 |
-
print(f"Error scraping snow depth: {e}")
|
| 117 |
-
return pd.DataFrame()
|
| 118 |
-
|
| 119 |
-
def parse_raw_data(data):
|
| 120 |
-
"""
|
| 121 |
-
Parse the raw JSON data into a DataFrame
|
| 122 |
-
"""
|
| 123 |
-
if not data or 'features' not in data:
|
| 124 |
-
return None
|
| 125 |
-
|
| 126 |
-
records = []
|
| 127 |
-
for feature in data['features']:
|
| 128 |
-
props = feature['properties']
|
| 129 |
-
|
| 130 |
-
# Extract all properties starting with 'snow'
|
| 131 |
-
snow_fields = {k: v for k, v in props.items() if 'snow' in k.lower()}
|
| 132 |
-
if snow_fields:
|
| 133 |
-
print("\nFound snow-related fields:")
|
| 134 |
-
for k, v in snow_fields.items():
|
| 135 |
-
print(f"{k}: {v}")
|
| 136 |
-
|
| 137 |
-
record = {
|
| 138 |
-
'timestamp': props['timestamp'],
|
| 139 |
-
'temperature': props.get('temperature', {}).get('value'),
|
| 140 |
-
'wind_speed': props.get('windSpeed', {}).get('value'),
|
| 141 |
-
'wind_direction': props.get('windDirection', {}).get('value')
|
| 142 |
-
}
|
| 143 |
-
|
| 144 |
-
# Add all snow-related fields to the record
|
| 145 |
-
for k, v in snow_fields.items():
|
| 146 |
-
if isinstance(v, dict) and 'value' in v:
|
| 147 |
-
record[k] = v['value']
|
| 148 |
-
else:
|
| 149 |
-
record[k] = v
|
| 150 |
-
|
| 151 |
-
records.append(record)
|
| 152 |
-
|
| 153 |
-
df = pd.DataFrame(records)
|
| 154 |
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
return df
|
| 161 |
-
|
| 162 |
-
def process_weather_data(df):
|
| 163 |
-
"""
|
| 164 |
-
Process the weather DataFrame
|
| 165 |
-
"""
|
| 166 |
-
if df is None or df.empty:
|
| 167 |
return None
|
| 168 |
-
|
| 169 |
-
# Convert timestamp
|
| 170 |
-
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
| 171 |
-
df['date'] = df['timestamp'].dt.date
|
| 172 |
-
|
| 173 |
-
# Convert temperature from C to F if not null
|
| 174 |
-
if df['temperature'].notna().all():
|
| 175 |
-
df['temperature'] = (df['temperature'] * 9/5) + 32
|
| 176 |
-
|
| 177 |
-
# Convert wind speed from km/h to mph if not null (original unit km/h)
|
| 178 |
-
if df['wind_speed'].notna().all():
|
| 179 |
-
df['wind_speed'] = df['wind_speed'] * 0.621371 # km/h to mph
|
| 180 |
-
|
| 181 |
-
return df
|
| 182 |
-
|
| 183 |
-
def create_wind_rose(ax, data, title):
|
| 184 |
-
"""
|
| 185 |
-
Create a wind rose subplot
|
| 186 |
-
"""
|
| 187 |
-
if data.empty or data['wind_direction'].isna().all() or data['wind_speed'].isna().all():
|
| 188 |
-
ax.text(0.5, 0.5, 'No wind data available',
|
| 189 |
-
horizontalalignment='center',
|
| 190 |
-
verticalalignment='center',
|
| 191 |
-
transform=ax.transAxes)
|
| 192 |
-
ax.set_title(title)
|
| 193 |
-
return
|
| 194 |
-
|
| 195 |
-
plot_data = data.copy()
|
| 196 |
-
|
| 197 |
-
direction_bins = np.arange(0, 361, 45)
|
| 198 |
-
directions = ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']
|
| 199 |
-
|
| 200 |
-
mask = plot_data['wind_direction'].notna() & plot_data['wind_speed'].notna()
|
| 201 |
-
plot_data = plot_data[mask]
|
| 202 |
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
horizontalalignment='center',
|
| 206 |
-
verticalalignment='center',
|
| 207 |
-
transform=ax.transAxes)
|
| 208 |
-
ax.set_title(title)
|
| 209 |
-
return
|
| 210 |
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
wind_stats = plot_data.groupby('direction_bin', observed=True)['wind_speed'].mean()
|
| 217 |
-
|
| 218 |
-
all_directions = pd.Series(0.0, index=directions)
|
| 219 |
-
wind_stats = wind_stats.combine_first(all_directions)
|
| 220 |
-
|
| 221 |
-
angles = np.linspace(0, 2*np.pi, len(directions), endpoint=False)
|
| 222 |
-
values = [wind_stats[d] for d in directions]
|
| 223 |
-
|
| 224 |
-
if any(v > 0 for v in values):
|
| 225 |
-
bars = ax.bar(angles, values, width=0.5, alpha=0.6)
|
| 226 |
-
ax.set_xticks(angles)
|
| 227 |
-
ax.set_xticklabels(directions)
|
| 228 |
-
else:
|
| 229 |
-
ax.text(0.5, 0.5, 'No significant wind',
|
| 230 |
-
horizontalalignment='center',
|
| 231 |
-
verticalalignment='center',
|
| 232 |
-
transform=ax.transAxes)
|
| 233 |
-
|
| 234 |
-
ax.set_title(title)
|
| 235 |
|
| 236 |
-
|
| 237 |
-
""
|
| 238 |
-
|
| 239 |
-
"""
|
| 240 |
-
fig = plt.figure(figsize=(20, 24))
|
| 241 |
-
gs = GridSpec(5, 2, figure=fig)
|
| 242 |
-
|
| 243 |
-
ax1 = fig.add_subplot(gs[0, :])
|
| 244 |
-
ax2 = fig.add_subplot(gs[1, :])
|
| 245 |
-
ax3 = fig.add_subplot(gs[2, :])
|
| 246 |
-
|
| 247 |
-
if not df['temperature'].isna().all():
|
| 248 |
-
ax1.plot(df['timestamp'], df['temperature'], linewidth=2)
|
| 249 |
-
ax1.set_title('Temperature Over Time')
|
| 250 |
-
ax1.set_ylabel('Temperature (°F)')
|
| 251 |
-
ax1.set_xlabel('')
|
| 252 |
-
ax1.grid(True)
|
| 253 |
-
|
| 254 |
-
if not df['wind_speed'].isna().all():
|
| 255 |
-
ax2.plot(df['timestamp'], df['wind_speed'], linewidth=2)
|
| 256 |
-
ax2.set_title('Wind Speed Over Time')
|
| 257 |
-
ax2.set_ylabel('Wind Speed (mph)')
|
| 258 |
-
ax2.set_xlabel('')
|
| 259 |
-
ax2.grid(True)
|
| 260 |
-
|
| 261 |
-
# Plot snow depth if available
|
| 262 |
-
if 'snowDepth' in df.columns and not df['snowDepth'].isna().all():
|
| 263 |
-
ax3.plot(df['timestamp'], df['snowDepth'], linewidth=2)
|
| 264 |
-
ax3.set_ylim(0, 80) # Fixed y-axis limit to 80 inches
|
| 265 |
-
else:
|
| 266 |
-
ax3.text(0.5, 0.5, 'No snow depth data available',
|
| 267 |
-
horizontalalignment='center',
|
| 268 |
-
verticalalignment='center',
|
| 269 |
-
transform=ax3.transAxes)
|
| 270 |
-
ax3.set_title('Snow Depth')
|
| 271 |
-
ax3.set_ylabel('Snow Depth (inches)')
|
| 272 |
-
ax3.set_xlabel('')
|
| 273 |
-
ax3.grid(True)
|
| 274 |
-
|
| 275 |
-
for ax in [ax1, ax2, ax3]:
|
| 276 |
-
ax.tick_params(axis='x', rotation=45)
|
| 277 |
-
ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y-%m-%d %H:%M'))
|
| 278 |
-
|
| 279 |
-
dates = sorted(df['date'].unique())
|
| 280 |
-
wind_axes = []
|
| 281 |
-
for i, date in enumerate(dates):
|
| 282 |
-
if i < 2:
|
| 283 |
-
ax = fig.add_subplot(gs[4, i], projection='polar')
|
| 284 |
-
day_data = df[df['date'] == date].copy()
|
| 285 |
-
create_wind_rose(ax, day_data, pd.to_datetime(date).strftime('%Y-%m-%d'))
|
| 286 |
-
wind_axes.append(ax)
|
| 287 |
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
def get_weather_data(station_id, hours):
|
| 293 |
-
"""
|
| 294 |
-
Main function to get and process weather data
|
| 295 |
-
"""
|
| 296 |
-
try:
|
| 297 |
-
# Get raw data from API
|
| 298 |
-
raw_data = get_raw_data(station_id)
|
| 299 |
-
if raw_data is None:
|
| 300 |
-
return None, "Failed to fetch data"
|
| 301 |
-
|
| 302 |
-
# Parse raw data
|
| 303 |
-
df = parse_raw_data(raw_data)
|
| 304 |
-
if df is None:
|
| 305 |
-
return None, "Failed to parse data"
|
| 306 |
-
|
| 307 |
-
# Process API data
|
| 308 |
-
df = process_weather_data(df)
|
| 309 |
-
if df is None:
|
| 310 |
-
return None, "Failed to process data"
|
| 311 |
-
|
| 312 |
-
# Scrape snow depth data and merge with API data
|
| 313 |
-
snow_df = scrape_snow_depth()
|
| 314 |
-
if not snow_df.empty:
|
| 315 |
-
df = df.sort_values('timestamp')
|
| 316 |
-
snow_df = snow_df.sort_values('timestamp')
|
| 317 |
-
# Merge using nearest timestamp within a 30-minute tolerance
|
| 318 |
-
df = pd.merge_asof(df, snow_df, on='timestamp', tolerance=pd.Timedelta('30min'), direction='nearest')
|
| 319 |
-
|
| 320 |
-
print("\nProcessed data sample:")
|
| 321 |
-
print(df.head())
|
| 322 |
-
|
| 323 |
-
return df, None
|
| 324 |
-
|
| 325 |
-
except Exception as e:
|
| 326 |
-
return None, f"Error: {str(e)}"
|
| 327 |
|
| 328 |
-
|
| 329 |
-
""
|
| 330 |
-
Main function to fetch data and create visualization
|
| 331 |
-
"""
|
| 332 |
-
df, error = get_weather_data(station_id, hours)
|
| 333 |
|
| 334 |
-
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
return fig, "Data fetched successfully!"
|
| 340 |
-
|
| 341 |
-
return None, "No data available for the specified parameters."
|
| 342 |
-
|
| 343 |
-
# Create Gradio interface
|
| 344 |
-
with gr.Blocks() as demo:
|
| 345 |
-
gr.Markdown("# Weather Data Viewer")
|
| 346 |
-
gr.Markdown("Displays temperature, wind speed, and snow data from NWS stations")
|
| 347 |
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
hours = gr.Slider(minimum=24, maximum=168, value=72,
|
| 351 |
-
label="Hours of Data", step=24)
|
| 352 |
|
| 353 |
-
|
|
|
|
| 354 |
|
| 355 |
-
|
| 356 |
-
message = gr.Textbox(label="Status")
|
| 357 |
-
|
| 358 |
-
fetch_btn.click(
|
| 359 |
-
fn=fetch_and_display,
|
| 360 |
-
inputs=[station_id, hours],
|
| 361 |
-
outputs=[plot_output, message]
|
| 362 |
-
)
|
| 363 |
|
| 364 |
-
|
| 365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# scrape_snow_depth.py
|
|
|
|
| 2 |
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
+
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
def scrape_snow_depth():
|
| 7 |
"""
|
| 8 |
+
Scrapes the snow depth data from the weather.gov timeseries page for station YCTIM.
|
| 9 |
+
It extracts the 'Time' and 'Snow Depth' columns from the table.
|
| 10 |
"""
|
| 11 |
url = "https://www.weather.gov/wrh/timeseries?site=YCTIM&hours=720&units=english&chart=on&headers=on&obs=tabular&hourly=false&pview=standard&font=12&plot="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
# Get the webpage content
|
| 14 |
+
response = requests.get(url)
|
| 15 |
+
if response.status_code != 200:
|
| 16 |
+
print(f"Error: Could not fetch the page. Status code: {response.status_code}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
# Parse the HTML content
|
| 20 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
+
# Find the first table on the page
|
| 23 |
+
table = soup.find("table")
|
| 24 |
+
if table is None:
|
| 25 |
+
print("No table found on the page.")
|
| 26 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
# Extract the header row to identify the column indices for 'Time' and 'Snow Depth'
|
| 29 |
+
header_row = table.find("tr")
|
| 30 |
+
headers = [th.get_text(strip=True) for th in header_row.find_all("th")]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
if "Time" not in headers or "Snow Depth" not in headers:
|
| 33 |
+
print("Required columns 'Time' or 'Snow Depth' not found in the table headers.")
|
| 34 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
time_index = headers.index("Time")
|
| 37 |
+
snow_depth_index = headers.index("Snow Depth")
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
+
# Extract data rows (skip the header)
|
| 40 |
+
rows = []
|
| 41 |
+
for tr in table.find_all("tr")[1:]:
|
| 42 |
+
cells = tr.find_all("td")
|
| 43 |
+
# Ensure there are enough cells in the row
|
| 44 |
+
if len(cells) > max(time_index, snow_depth_index):
|
| 45 |
+
time_text = cells[time_index].get_text(strip=True)
|
| 46 |
+
snow_text = cells[snow_depth_index].get_text(strip=True)
|
| 47 |
+
rows.append((time_text, snow_text))
|
| 48 |
|
| 49 |
+
# Create a DataFrame from the extracted data
|
| 50 |
+
df = pd.DataFrame(rows, columns=["Time", "Snow Depth"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
+
# Convert the "Time" column to datetime objects
|
| 53 |
+
df["Time"] = pd.to_datetime(df["Time"], errors="coerce")
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
# Convert "Snow Depth" to a numeric value (in inches)
|
| 56 |
+
df["Snow Depth"] = pd.to_numeric(df["Snow Depth"], errors="coerce")
|
| 57 |
|
| 58 |
+
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
+
if __name__ == "__main__":
|
| 61 |
+
snow_df = scrape_snow_depth()
|
| 62 |
+
if snow_df is not None:
|
| 63 |
+
print("Sample of scraped snow depth data:")
|
| 64 |
+
print(snow_df.head())
|
| 65 |
+
else:
|
| 66 |
+
print("No snow depth data could be scraped.")
|