"""Flask backend for HRRR Weather Dashboard - HF Spaces deployment.""" import json import os from datetime import date from functools import lru_cache import pandas as pd import snowflake.connector from flask import Flask, jsonify, render_template, request, Response from geopy.extra.rate_limiter import RateLimiter from geopy.geocoders import Nominatim app = Flask(__name__) # --------------------------------------------------------------------------- # Constants # --------------------------------------------------------------------------- HRRR_VARIABLES = { "Temperature": "TMP", "Wind": "WIND", "Gust": "GUST", "Precipitation": "APCP", } HRRR_UNITS = { "Temperature": "F", "Wind": "mph", "Gust": "mph", "Precipitation": "in", } C_TO_F = lambda c: c * 9 / 5 + 32 MS_TO_MPH = lambda ms: ms * 2.23694 MM_TO_IN = lambda mm: mm / 25.4 CONVERTERS = { "TMP": C_TO_F, "WIND": MS_TO_MPH, "GUST": MS_TO_MPH, "APCP": MM_TO_IN, } # --------------------------------------------------------------------------- # Snowflake connection (use env vars / HF Secrets) # --------------------------------------------------------------------------- SF_ACCOUNT = os.environ.get("SF_ACCOUNT", "") SF_USER = os.environ.get("SF_USER", "") SF_PASSWORD = os.environ.get("SF_PASSWORD", "") SF_DATABASE = os.environ.get("SF_DATABASE", "PROD_ENT_BYOD_WORKSPACE_DB") SF_SCHEMA = os.environ.get("SF_SCHEMA", "BYOD_NOAA") SF_WAREHOUSE = os.environ.get("SF_WAREHOUSE", "") SF_ROLE = os.environ.get("SF_ROLE", "") def get_snowflake_connection(): """Create a Snowflake connection using password auth (for containers).""" params = { "account": SF_ACCOUNT, "user": SF_USER, "password": SF_PASSWORD, "database": SF_DATABASE, "schema": SF_SCHEMA, } if SF_WAREHOUSE: params["warehouse"] = SF_WAREHOUSE if SF_ROLE: params["role"] = SF_ROLE return snowflake.connector.connect(**params) # --------------------------------------------------------------------------- # Data helpers # --------------------------------------------------------------------------- @lru_cache(maxsize=128) def geocode(address: str) -> tuple: """Return (lat, lon) for an address.""" try: address2 = address.replace(" ", "+").replace(",", "%2C") url = ( "https://geocoding.geo.census.gov/geocoder/locations/onelineaddress" f"?address={address2}&benchmark=2020&format=json" ) df = pd.read_json(url) coords = df.iloc[:1, 0][0][0]["coordinates"] return (coords["y"], coords["x"]) except Exception: geolocator = Nominatim(user_agent="HRRRWeatherDashboard") geocode_fn = RateLimiter(geolocator.geocode, min_delay_seconds=1) location = geocode_fn(address) if location is None: raise ValueError(f"Could not geocode: {address}") return (location.latitude, location.longitude) def get_row_col(conn, lon: float, lat: float) -> tuple: """Call Snowflake UDF to convert lat/lon to HRRR grid row/col.""" cur = conn.cursor() cur.execute( f"SELECT PROD_ENT_BYOD_WORKSPACE_DB.BYOD_NOAA.LAT_LON_TO_ROW_COL_HRRR({lon}, {lat}) AS result" ) result = cur.fetchone()[0] parsed = json.loads(result) cur.close() return parsed["row"], parsed["col"] def get_hrrr_data(conn, row: int, col: int, start_date: date, end_date: date, variable: str) -> pd.DataFrame: """Query HRRR hourly data across year-partitioned tables.""" start_year = start_date.year end_year = end_date.year start_int = int(start_date.strftime("%Y%m%d") + "00") end_int = int(end_date.strftime("%Y%m%d") + "23") parts = [] for year in range(start_year, end_year + 1): table = f"PROD_ENT_BYOD_WORKSPACE_DB.BYOD_NOAA.HRRR_DATA_HOURLY_{year}" yr_start = max(start_int, int(f"{year}010100")) yr_end = min(end_int, int(f"{year}123123")) parts.append(f""" SELECT "DATE_TIME_INT", "value" FROM {table} WHERE "row" = {row} AND "col" = {col} AND "variable" = '{variable}' AND "DATE_TIME_INT" BETWEEN {yr_start} AND {yr_end} """) sql = " UNION ALL ".join(parts) + " ORDER BY 1" cur = conn.cursor() cur.execute(sql) rows = cur.fetchall() cur.close() return pd.DataFrame(rows, columns=["DATE_TIME_INT", "value"]) def parse_hrrr_datetime(dt_int: int) -> pd.Timestamp: """Convert YYYYMMDDHH integer to Timestamp.""" s = str(dt_int) return pd.Timestamp(year=int(s[:4]), month=int(s[4:6]), day=int(s[6:8]), hour=int(s[8:10])) # --------------------------------------------------------------------------- # Routes # --------------------------------------------------------------------------- @app.route("/") def index(): """Serve the frontend.""" return render_template("index.html") @app.route("/api/geocode") def api_geocode(): """Geocode an address. Returns {lat, lon}.""" address = request.args.get("address", "").strip() if not address: return jsonify({"error": "address parameter is required"}), 400 try: lat, lon = geocode(address) return jsonify({"lat": lat, "lon": lon}) except Exception as exc: return jsonify({"error": str(exc)}), 400 @app.route("/api/weather") def api_weather(): """Fetch HRRR weather data. Returns hourly, daily, monthly, and KPIs.""" lat = request.args.get("lat", type=float) lon = request.args.get("lon", type=float) start = request.args.get("start", "") end = request.args.get("end", "") variable = request.args.get("variable", "Gust") if lat is None or lon is None or not start or not end: return jsonify({"error": "lat, lon, start, end are required"}), 400 if variable not in HRRR_VARIABLES: return jsonify({"error": f"Invalid variable. Choose from: {list(HRRR_VARIABLES.keys())}"}), 400 hrrr_var = HRRR_VARIABLES[variable] unit = HRRR_UNITS[variable] convert = CONVERTERS[hrrr_var] start_date = date.fromisoformat(start) end_date = date.fromisoformat(end) conn = get_snowflake_connection() try: row, col = get_row_col(conn, lon, lat) raw = get_hrrr_data(conn, row, col, start_date, end_date, hrrr_var) finally: conn.close() if raw.empty: return jsonify({"error": "No data found for the selected parameters"}), 404 df = raw.copy() df["time"] = df["DATE_TIME_INT"].apply(parse_hrrr_datetime) df["value_converted"] = df["value"].apply(convert) df = df.sort_values("time").reset_index(drop=True) hourly = [ {"time": r["time"].isoformat(), "value": round(r["value_converted"], 2)} for _, r in df.iterrows() ] df["date"] = df["time"].dt.date if variable == "Precipitation": daily = df.groupby("date").agg(total=("value_converted", "sum")).reset_index() daily_out = [ {"date": r["date"].isoformat(), "total": round(r["total"], 3)} for _, r in daily.iterrows() ] kpis = { "total": round(float(daily["total"].sum()), 2), "avg_daily": round(float(daily["total"].mean()), 3), "max_daily": round(float(daily["total"].max()), 3), "dry_days": int((daily["total"] < 0.01).sum()), } else: daily = df.groupby("date").agg( mean=("value_converted", "mean"), min=("value_converted", "min"), max=("value_converted", "max"), ).reset_index() daily_out = [ {"date": r["date"].isoformat(), "min": round(r["min"], 1), "mean": round(r["mean"], 1), "max": round(r["max"], 1)} for _, r in daily.iterrows() ] kpis = { "avg": round(float(daily["mean"].mean()), 1), "min": round(float(daily["min"].min()), 1), "max": round(float(daily["max"].max()), 1), "days": len(daily), } df["month"] = df["time"].dt.to_period("M").astype(str) if variable == "Precipitation": monthly = df.groupby("month")["value_converted"].sum().reset_index() else: monthly = df.groupby("month")["value_converted"].mean().reset_index() monthly.columns = ["month", "value"] monthly_out = [ {"month": r["month"], "value": round(r["value"], 2)} for _, r in monthly.iterrows() ] return jsonify({ "variable": variable, "unit": unit, "grid": {"row": row, "col": col}, "kpis": kpis, "hourly": hourly, "daily": daily_out, "monthly": monthly_out, }) @app.route("/api/download") def api_download(): """Download weather data as CSV.""" lat = request.args.get("lat", type=float) lon = request.args.get("lon", type=float) start = request.args.get("start", "") end = request.args.get("end", "") variable = request.args.get("variable", "Gust") agg = request.args.get("agg", "daily") if lat is None or lon is None or not start or not end: return jsonify({"error": "lat, lon, start, end are required"}), 400 hrrr_var = HRRR_VARIABLES.get(variable) if not hrrr_var: return jsonify({"error": "Invalid variable"}), 400 convert = CONVERTERS[hrrr_var] unit = HRRR_UNITS[variable] start_date = date.fromisoformat(start) end_date = date.fromisoformat(end) conn = get_snowflake_connection() try: row, col = get_row_col(conn, lon, lat) raw = get_hrrr_data(conn, row, col, start_date, end_date, hrrr_var) finally: conn.close() if raw.empty: return Response("No data", status=404) df = raw.copy() df["time"] = df["DATE_TIME_INT"].apply(parse_hrrr_datetime) df["value_converted"] = df["value"].apply(convert) df = df.sort_values("time").reset_index(drop=True) if agg == "hourly": out = df[["time", "value_converted"]].rename(columns={"value_converted": f"{variable} ({unit})"}) else: df["date"] = df["time"].dt.date if variable == "Precipitation": out = df.groupby("date").agg(total=("value_converted", "sum")).reset_index() out.columns = ["date", f"{variable} ({unit})"] else: out = df.groupby("date").agg( min=("value_converted", "min"), mean=("value_converted", "mean"), max=("value_converted", "max"), ).reset_index() out.columns = ["date", f"Min ({unit})", f"Mean ({unit})", f"Max ({unit})"] csv_str = out.to_csv(index=False) return Response( csv_str, mimetype="text/csv", headers={"Content-Disposition": f"attachment; filename=hrrr_{variable.lower()}_{start}_{end}.csv"}, ) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)