Spaces:
Sleeping
Sleeping
| """Flask backend for HRRR Weather Dashboard - HF Spaces deployment.""" | |
| import json | |
| import os | |
| from datetime import date | |
| from functools import lru_cache | |
| import pandas as pd | |
| import snowflake.connector | |
| from flask import Flask, jsonify, render_template, request, Response | |
| from geopy.extra.rate_limiter import RateLimiter | |
| from geopy.geocoders import Nominatim | |
| app = Flask(__name__) | |
| # --------------------------------------------------------------------------- | |
| # Constants | |
| # --------------------------------------------------------------------------- | |
| HRRR_VARIABLES = { | |
| "Temperature": "TMP", | |
| "Wind": "WIND", | |
| "Gust": "GUST", | |
| "Precipitation": "APCP", | |
| } | |
| HRRR_UNITS = { | |
| "Temperature": "F", | |
| "Wind": "mph", | |
| "Gust": "mph", | |
| "Precipitation": "in", | |
| } | |
| C_TO_F = lambda c: c * 9 / 5 + 32 | |
| MS_TO_MPH = lambda ms: ms * 2.23694 | |
| MM_TO_IN = lambda mm: mm / 25.4 | |
| CONVERTERS = { | |
| "TMP": C_TO_F, | |
| "WIND": MS_TO_MPH, | |
| "GUST": MS_TO_MPH, | |
| "APCP": MM_TO_IN, | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Snowflake connection (use env vars / HF Secrets) | |
| # --------------------------------------------------------------------------- | |
| SF_ACCOUNT = os.environ.get("SF_ACCOUNT", "") | |
| SF_USER = os.environ.get("SF_USER", "") | |
| SF_PASSWORD = os.environ.get("SF_PASSWORD", "") | |
| SF_DATABASE = os.environ.get("SF_DATABASE", "PROD_ENT_BYOD_WORKSPACE_DB") | |
| SF_SCHEMA = os.environ.get("SF_SCHEMA", "BYOD_NOAA") | |
| SF_WAREHOUSE = os.environ.get("SF_WAREHOUSE", "") | |
| SF_ROLE = os.environ.get("SF_ROLE", "") | |
| def get_snowflake_connection(): | |
| """Create a Snowflake connection using password auth (for containers).""" | |
| params = { | |
| "account": SF_ACCOUNT, | |
| "user": SF_USER, | |
| "password": SF_PASSWORD, | |
| "database": SF_DATABASE, | |
| "schema": SF_SCHEMA, | |
| } | |
| if SF_WAREHOUSE: | |
| params["warehouse"] = SF_WAREHOUSE | |
| if SF_ROLE: | |
| params["role"] = SF_ROLE | |
| return snowflake.connector.connect(**params) | |
| # --------------------------------------------------------------------------- | |
| # Data helpers | |
| # --------------------------------------------------------------------------- | |
| def geocode(address: str) -> tuple: | |
| """Return (lat, lon) for an address.""" | |
| try: | |
| address2 = address.replace(" ", "+").replace(",", "%2C") | |
| url = ( | |
| "https://geocoding.geo.census.gov/geocoder/locations/onelineaddress" | |
| f"?address={address2}&benchmark=2020&format=json" | |
| ) | |
| df = pd.read_json(url) | |
| coords = df.iloc[:1, 0][0][0]["coordinates"] | |
| return (coords["y"], coords["x"]) | |
| except Exception: | |
| geolocator = Nominatim(user_agent="HRRRWeatherDashboard") | |
| geocode_fn = RateLimiter(geolocator.geocode, min_delay_seconds=1) | |
| location = geocode_fn(address) | |
| if location is None: | |
| raise ValueError(f"Could not geocode: {address}") | |
| return (location.latitude, location.longitude) | |
| def get_row_col(conn, lon: float, lat: float) -> tuple: | |
| """Call Snowflake UDF to convert lat/lon to HRRR grid row/col.""" | |
| cur = conn.cursor() | |
| cur.execute( | |
| f"SELECT PROD_ENT_BYOD_WORKSPACE_DB.BYOD_NOAA.LAT_LON_TO_ROW_COL_HRRR({lon}, {lat}) AS result" | |
| ) | |
| result = cur.fetchone()[0] | |
| parsed = json.loads(result) | |
| cur.close() | |
| return parsed["row"], parsed["col"] | |
| def get_hrrr_data(conn, row: int, col: int, start_date: date, end_date: date, variable: str) -> pd.DataFrame: | |
| """Query HRRR hourly data across year-partitioned tables.""" | |
| start_year = start_date.year | |
| end_year = end_date.year | |
| start_int = int(start_date.strftime("%Y%m%d") + "00") | |
| end_int = int(end_date.strftime("%Y%m%d") + "23") | |
| parts = [] | |
| for year in range(start_year, end_year + 1): | |
| table = f"PROD_ENT_BYOD_WORKSPACE_DB.BYOD_NOAA.HRRR_DATA_HOURLY_{year}" | |
| yr_start = max(start_int, int(f"{year}010100")) | |
| yr_end = min(end_int, int(f"{year}123123")) | |
| parts.append(f""" | |
| SELECT "DATE_TIME_INT", "value" | |
| FROM {table} | |
| WHERE "row" = {row} | |
| AND "col" = {col} | |
| AND "variable" = '{variable}' | |
| AND "DATE_TIME_INT" BETWEEN {yr_start} AND {yr_end} | |
| """) | |
| sql = " UNION ALL ".join(parts) + " ORDER BY 1" | |
| cur = conn.cursor() | |
| cur.execute(sql) | |
| rows = cur.fetchall() | |
| cur.close() | |
| return pd.DataFrame(rows, columns=["DATE_TIME_INT", "value"]) | |
| def parse_hrrr_datetime(dt_int: int) -> pd.Timestamp: | |
| """Convert YYYYMMDDHH integer to Timestamp.""" | |
| s = str(dt_int) | |
| return pd.Timestamp(year=int(s[:4]), month=int(s[4:6]), day=int(s[6:8]), hour=int(s[8:10])) | |
| # --------------------------------------------------------------------------- | |
| # Routes | |
| # --------------------------------------------------------------------------- | |
| def index(): | |
| """Serve the frontend.""" | |
| return render_template("index.html") | |
| def api_geocode(): | |
| """Geocode an address. Returns {lat, lon}.""" | |
| address = request.args.get("address", "").strip() | |
| if not address: | |
| return jsonify({"error": "address parameter is required"}), 400 | |
| try: | |
| lat, lon = geocode(address) | |
| return jsonify({"lat": lat, "lon": lon}) | |
| except Exception as exc: | |
| return jsonify({"error": str(exc)}), 400 | |
| def api_weather(): | |
| """Fetch HRRR weather data. Returns hourly, daily, monthly, and KPIs.""" | |
| lat = request.args.get("lat", type=float) | |
| lon = request.args.get("lon", type=float) | |
| start = request.args.get("start", "") | |
| end = request.args.get("end", "") | |
| variable = request.args.get("variable", "Gust") | |
| if lat is None or lon is None or not start or not end: | |
| return jsonify({"error": "lat, lon, start, end are required"}), 400 | |
| if variable not in HRRR_VARIABLES: | |
| return jsonify({"error": f"Invalid variable. Choose from: {list(HRRR_VARIABLES.keys())}"}), 400 | |
| hrrr_var = HRRR_VARIABLES[variable] | |
| unit = HRRR_UNITS[variable] | |
| convert = CONVERTERS[hrrr_var] | |
| start_date = date.fromisoformat(start) | |
| end_date = date.fromisoformat(end) | |
| conn = get_snowflake_connection() | |
| try: | |
| row, col = get_row_col(conn, lon, lat) | |
| raw = get_hrrr_data(conn, row, col, start_date, end_date, hrrr_var) | |
| finally: | |
| conn.close() | |
| if raw.empty: | |
| return jsonify({"error": "No data found for the selected parameters"}), 404 | |
| df = raw.copy() | |
| df["time"] = df["DATE_TIME_INT"].apply(parse_hrrr_datetime) | |
| df["value_converted"] = df["value"].apply(convert) | |
| df = df.sort_values("time").reset_index(drop=True) | |
| hourly = [ | |
| {"time": r["time"].isoformat(), "value": round(r["value_converted"], 2)} | |
| for _, r in df.iterrows() | |
| ] | |
| df["date"] = df["time"].dt.date | |
| if variable == "Precipitation": | |
| daily = df.groupby("date").agg(total=("value_converted", "sum")).reset_index() | |
| daily_out = [ | |
| {"date": r["date"].isoformat(), "total": round(r["total"], 3)} | |
| for _, r in daily.iterrows() | |
| ] | |
| kpis = { | |
| "total": round(float(daily["total"].sum()), 2), | |
| "avg_daily": round(float(daily["total"].mean()), 3), | |
| "max_daily": round(float(daily["total"].max()), 3), | |
| "dry_days": int((daily["total"] < 0.01).sum()), | |
| } | |
| else: | |
| daily = df.groupby("date").agg( | |
| mean=("value_converted", "mean"), | |
| min=("value_converted", "min"), | |
| max=("value_converted", "max"), | |
| ).reset_index() | |
| daily_out = [ | |
| {"date": r["date"].isoformat(), "min": round(r["min"], 1), "mean": round(r["mean"], 1), "max": round(r["max"], 1)} | |
| for _, r in daily.iterrows() | |
| ] | |
| kpis = { | |
| "avg": round(float(daily["mean"].mean()), 1), | |
| "min": round(float(daily["min"].min()), 1), | |
| "max": round(float(daily["max"].max()), 1), | |
| "days": len(daily), | |
| } | |
| df["month"] = df["time"].dt.to_period("M").astype(str) | |
| if variable == "Precipitation": | |
| monthly = df.groupby("month")["value_converted"].sum().reset_index() | |
| else: | |
| monthly = df.groupby("month")["value_converted"].mean().reset_index() | |
| monthly.columns = ["month", "value"] | |
| monthly_out = [ | |
| {"month": r["month"], "value": round(r["value"], 2)} | |
| for _, r in monthly.iterrows() | |
| ] | |
| return jsonify({ | |
| "variable": variable, | |
| "unit": unit, | |
| "grid": {"row": row, "col": col}, | |
| "kpis": kpis, | |
| "hourly": hourly, | |
| "daily": daily_out, | |
| "monthly": monthly_out, | |
| }) | |
| def api_download(): | |
| """Download weather data as CSV.""" | |
| lat = request.args.get("lat", type=float) | |
| lon = request.args.get("lon", type=float) | |
| start = request.args.get("start", "") | |
| end = request.args.get("end", "") | |
| variable = request.args.get("variable", "Gust") | |
| agg = request.args.get("agg", "daily") | |
| if lat is None or lon is None or not start or not end: | |
| return jsonify({"error": "lat, lon, start, end are required"}), 400 | |
| hrrr_var = HRRR_VARIABLES.get(variable) | |
| if not hrrr_var: | |
| return jsonify({"error": "Invalid variable"}), 400 | |
| convert = CONVERTERS[hrrr_var] | |
| unit = HRRR_UNITS[variable] | |
| start_date = date.fromisoformat(start) | |
| end_date = date.fromisoformat(end) | |
| conn = get_snowflake_connection() | |
| try: | |
| row, col = get_row_col(conn, lon, lat) | |
| raw = get_hrrr_data(conn, row, col, start_date, end_date, hrrr_var) | |
| finally: | |
| conn.close() | |
| if raw.empty: | |
| return Response("No data", status=404) | |
| df = raw.copy() | |
| df["time"] = df["DATE_TIME_INT"].apply(parse_hrrr_datetime) | |
| df["value_converted"] = df["value"].apply(convert) | |
| df = df.sort_values("time").reset_index(drop=True) | |
| if agg == "hourly": | |
| out = df[["time", "value_converted"]].rename(columns={"value_converted": f"{variable} ({unit})"}) | |
| else: | |
| df["date"] = df["time"].dt.date | |
| if variable == "Precipitation": | |
| out = df.groupby("date").agg(total=("value_converted", "sum")).reset_index() | |
| out.columns = ["date", f"{variable} ({unit})"] | |
| else: | |
| out = df.groupby("date").agg( | |
| min=("value_converted", "min"), | |
| mean=("value_converted", "mean"), | |
| max=("value_converted", "max"), | |
| ).reset_index() | |
| out.columns = ["date", f"Min ({unit})", f"Mean ({unit})", f"Max ({unit})"] | |
| csv_str = out.to_csv(index=False) | |
| return Response( | |
| csv_str, | |
| mimetype="text/csv", | |
| headers={"Content-Disposition": f"attachment; filename=hrrr_{variable.lower()}_{start}_{end}.csv"}, | |
| ) | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=7860) | |