flask_test / app.py
mattritchey's picture
Upload 5 files
59a777c verified
"""Flask backend for HRRR Weather Dashboard - HF Spaces deployment."""
import json
import os
from datetime import date
from functools import lru_cache
import pandas as pd
import snowflake.connector
from flask import Flask, jsonify, render_template, request, Response
from geopy.extra.rate_limiter import RateLimiter
from geopy.geocoders import Nominatim
app = Flask(__name__)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
HRRR_VARIABLES = {
"Temperature": "TMP",
"Wind": "WIND",
"Gust": "GUST",
"Precipitation": "APCP",
}
HRRR_UNITS = {
"Temperature": "F",
"Wind": "mph",
"Gust": "mph",
"Precipitation": "in",
}
C_TO_F = lambda c: c * 9 / 5 + 32
MS_TO_MPH = lambda ms: ms * 2.23694
MM_TO_IN = lambda mm: mm / 25.4
CONVERTERS = {
"TMP": C_TO_F,
"WIND": MS_TO_MPH,
"GUST": MS_TO_MPH,
"APCP": MM_TO_IN,
}
# ---------------------------------------------------------------------------
# Snowflake connection (use env vars / HF Secrets)
# ---------------------------------------------------------------------------
SF_ACCOUNT = os.environ.get("SF_ACCOUNT", "")
SF_USER = os.environ.get("SF_USER", "")
SF_PASSWORD = os.environ.get("SF_PASSWORD", "")
SF_DATABASE = os.environ.get("SF_DATABASE", "PROD_ENT_BYOD_WORKSPACE_DB")
SF_SCHEMA = os.environ.get("SF_SCHEMA", "BYOD_NOAA")
SF_WAREHOUSE = os.environ.get("SF_WAREHOUSE", "")
SF_ROLE = os.environ.get("SF_ROLE", "")
def get_snowflake_connection():
"""Create a Snowflake connection using password auth (for containers)."""
params = {
"account": SF_ACCOUNT,
"user": SF_USER,
"password": SF_PASSWORD,
"database": SF_DATABASE,
"schema": SF_SCHEMA,
}
if SF_WAREHOUSE:
params["warehouse"] = SF_WAREHOUSE
if SF_ROLE:
params["role"] = SF_ROLE
return snowflake.connector.connect(**params)
# ---------------------------------------------------------------------------
# Data helpers
# ---------------------------------------------------------------------------
@lru_cache(maxsize=128)
def geocode(address: str) -> tuple:
"""Return (lat, lon) for an address."""
try:
address2 = address.replace(" ", "+").replace(",", "%2C")
url = (
"https://geocoding.geo.census.gov/geocoder/locations/onelineaddress"
f"?address={address2}&benchmark=2020&format=json"
)
df = pd.read_json(url)
coords = df.iloc[:1, 0][0][0]["coordinates"]
return (coords["y"], coords["x"])
except Exception:
geolocator = Nominatim(user_agent="HRRRWeatherDashboard")
geocode_fn = RateLimiter(geolocator.geocode, min_delay_seconds=1)
location = geocode_fn(address)
if location is None:
raise ValueError(f"Could not geocode: {address}")
return (location.latitude, location.longitude)
def get_row_col(conn, lon: float, lat: float) -> tuple:
"""Call Snowflake UDF to convert lat/lon to HRRR grid row/col."""
cur = conn.cursor()
cur.execute(
f"SELECT PROD_ENT_BYOD_WORKSPACE_DB.BYOD_NOAA.LAT_LON_TO_ROW_COL_HRRR({lon}, {lat}) AS result"
)
result = cur.fetchone()[0]
parsed = json.loads(result)
cur.close()
return parsed["row"], parsed["col"]
def get_hrrr_data(conn, row: int, col: int, start_date: date, end_date: date, variable: str) -> pd.DataFrame:
"""Query HRRR hourly data across year-partitioned tables."""
start_year = start_date.year
end_year = end_date.year
start_int = int(start_date.strftime("%Y%m%d") + "00")
end_int = int(end_date.strftime("%Y%m%d") + "23")
parts = []
for year in range(start_year, end_year + 1):
table = f"PROD_ENT_BYOD_WORKSPACE_DB.BYOD_NOAA.HRRR_DATA_HOURLY_{year}"
yr_start = max(start_int, int(f"{year}010100"))
yr_end = min(end_int, int(f"{year}123123"))
parts.append(f"""
SELECT "DATE_TIME_INT", "value"
FROM {table}
WHERE "row" = {row}
AND "col" = {col}
AND "variable" = '{variable}'
AND "DATE_TIME_INT" BETWEEN {yr_start} AND {yr_end}
""")
sql = " UNION ALL ".join(parts) + " ORDER BY 1"
cur = conn.cursor()
cur.execute(sql)
rows = cur.fetchall()
cur.close()
return pd.DataFrame(rows, columns=["DATE_TIME_INT", "value"])
def parse_hrrr_datetime(dt_int: int) -> pd.Timestamp:
"""Convert YYYYMMDDHH integer to Timestamp."""
s = str(dt_int)
return pd.Timestamp(year=int(s[:4]), month=int(s[4:6]), day=int(s[6:8]), hour=int(s[8:10]))
# ---------------------------------------------------------------------------
# Routes
# ---------------------------------------------------------------------------
@app.route("/")
def index():
"""Serve the frontend."""
return render_template("index.html")
@app.route("/api/geocode")
def api_geocode():
"""Geocode an address. Returns {lat, lon}."""
address = request.args.get("address", "").strip()
if not address:
return jsonify({"error": "address parameter is required"}), 400
try:
lat, lon = geocode(address)
return jsonify({"lat": lat, "lon": lon})
except Exception as exc:
return jsonify({"error": str(exc)}), 400
@app.route("/api/weather")
def api_weather():
"""Fetch HRRR weather data. Returns hourly, daily, monthly, and KPIs."""
lat = request.args.get("lat", type=float)
lon = request.args.get("lon", type=float)
start = request.args.get("start", "")
end = request.args.get("end", "")
variable = request.args.get("variable", "Gust")
if lat is None or lon is None or not start or not end:
return jsonify({"error": "lat, lon, start, end are required"}), 400
if variable not in HRRR_VARIABLES:
return jsonify({"error": f"Invalid variable. Choose from: {list(HRRR_VARIABLES.keys())}"}), 400
hrrr_var = HRRR_VARIABLES[variable]
unit = HRRR_UNITS[variable]
convert = CONVERTERS[hrrr_var]
start_date = date.fromisoformat(start)
end_date = date.fromisoformat(end)
conn = get_snowflake_connection()
try:
row, col = get_row_col(conn, lon, lat)
raw = get_hrrr_data(conn, row, col, start_date, end_date, hrrr_var)
finally:
conn.close()
if raw.empty:
return jsonify({"error": "No data found for the selected parameters"}), 404
df = raw.copy()
df["time"] = df["DATE_TIME_INT"].apply(parse_hrrr_datetime)
df["value_converted"] = df["value"].apply(convert)
df = df.sort_values("time").reset_index(drop=True)
hourly = [
{"time": r["time"].isoformat(), "value": round(r["value_converted"], 2)}
for _, r in df.iterrows()
]
df["date"] = df["time"].dt.date
if variable == "Precipitation":
daily = df.groupby("date").agg(total=("value_converted", "sum")).reset_index()
daily_out = [
{"date": r["date"].isoformat(), "total": round(r["total"], 3)}
for _, r in daily.iterrows()
]
kpis = {
"total": round(float(daily["total"].sum()), 2),
"avg_daily": round(float(daily["total"].mean()), 3),
"max_daily": round(float(daily["total"].max()), 3),
"dry_days": int((daily["total"] < 0.01).sum()),
}
else:
daily = df.groupby("date").agg(
mean=("value_converted", "mean"),
min=("value_converted", "min"),
max=("value_converted", "max"),
).reset_index()
daily_out = [
{"date": r["date"].isoformat(), "min": round(r["min"], 1), "mean": round(r["mean"], 1), "max": round(r["max"], 1)}
for _, r in daily.iterrows()
]
kpis = {
"avg": round(float(daily["mean"].mean()), 1),
"min": round(float(daily["min"].min()), 1),
"max": round(float(daily["max"].max()), 1),
"days": len(daily),
}
df["month"] = df["time"].dt.to_period("M").astype(str)
if variable == "Precipitation":
monthly = df.groupby("month")["value_converted"].sum().reset_index()
else:
monthly = df.groupby("month")["value_converted"].mean().reset_index()
monthly.columns = ["month", "value"]
monthly_out = [
{"month": r["month"], "value": round(r["value"], 2)}
for _, r in monthly.iterrows()
]
return jsonify({
"variable": variable,
"unit": unit,
"grid": {"row": row, "col": col},
"kpis": kpis,
"hourly": hourly,
"daily": daily_out,
"monthly": monthly_out,
})
@app.route("/api/download")
def api_download():
"""Download weather data as CSV."""
lat = request.args.get("lat", type=float)
lon = request.args.get("lon", type=float)
start = request.args.get("start", "")
end = request.args.get("end", "")
variable = request.args.get("variable", "Gust")
agg = request.args.get("agg", "daily")
if lat is None or lon is None or not start or not end:
return jsonify({"error": "lat, lon, start, end are required"}), 400
hrrr_var = HRRR_VARIABLES.get(variable)
if not hrrr_var:
return jsonify({"error": "Invalid variable"}), 400
convert = CONVERTERS[hrrr_var]
unit = HRRR_UNITS[variable]
start_date = date.fromisoformat(start)
end_date = date.fromisoformat(end)
conn = get_snowflake_connection()
try:
row, col = get_row_col(conn, lon, lat)
raw = get_hrrr_data(conn, row, col, start_date, end_date, hrrr_var)
finally:
conn.close()
if raw.empty:
return Response("No data", status=404)
df = raw.copy()
df["time"] = df["DATE_TIME_INT"].apply(parse_hrrr_datetime)
df["value_converted"] = df["value"].apply(convert)
df = df.sort_values("time").reset_index(drop=True)
if agg == "hourly":
out = df[["time", "value_converted"]].rename(columns={"value_converted": f"{variable} ({unit})"})
else:
df["date"] = df["time"].dt.date
if variable == "Precipitation":
out = df.groupby("date").agg(total=("value_converted", "sum")).reset_index()
out.columns = ["date", f"{variable} ({unit})"]
else:
out = df.groupby("date").agg(
min=("value_converted", "min"),
mean=("value_converted", "mean"),
max=("value_converted", "max"),
).reset_index()
out.columns = ["date", f"Min ({unit})", f"Mean ({unit})", f"Max ({unit})"]
csv_str = out.to_csv(index=False)
return Response(
csv_str,
mimetype="text/csv",
headers={"Content-Disposition": f"attachment; filename=hrrr_{variable.lower()}_{start}_{end}.csv"},
)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)