github-actions[bot] commited on
Commit ·
4f7ea6b
1
Parent(s): 697df06
Deploy from GitHub Actions
Browse files- analysis.py +194 -0
- app.py +2 -0
- pages.py +6 -1
- ui/__init__.py +0 -0
- ui/pages/__init__.py +0 -0
- ui/pages/dissolved_oxygen.py +31 -0
- utils/data_loading.py +2 -1
analysis.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
|
|
| 1 |
import sys
|
| 2 |
from pathlib import Path
|
| 3 |
|
| 4 |
import altair as alt
|
| 5 |
import contextily as ctx
|
| 6 |
import geopandas as gpd
|
|
|
|
| 7 |
import matplotlib.pyplot as plt
|
| 8 |
import numpy as np
|
| 9 |
import pandas as pd
|
|
@@ -1554,3 +1556,195 @@ def generate_seasonal_plot(data, year, shapefile_path):
|
|
| 1554 |
wbids=wbids,
|
| 1555 |
reporting_end_month=st.session_state.reporting_month,
|
| 1556 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
import sys
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
import altair as alt
|
| 6 |
import contextily as ctx
|
| 7 |
import geopandas as gpd
|
| 8 |
+
import matplotlib.dates as mdates
|
| 9 |
import matplotlib.pyplot as plt
|
| 10 |
import numpy as np
|
| 11 |
import pandas as pd
|
|
|
|
| 1556 |
wbids=wbids,
|
| 1557 |
reporting_end_month=st.session_state.reporting_month,
|
| 1558 |
)
|
| 1559 |
+
|
| 1560 |
+
|
| 1561 |
+
def plot_do_timeseries(
|
| 1562 |
+
df: pd.DataFrame,
|
| 1563 |
+
period: str = "Yearly",
|
| 1564 |
+
sector: str = "All",
|
| 1565 |
+
epa_thresh: float = 4.8,
|
| 1566 |
+
) -> Figure:
|
| 1567 |
+
"""
|
| 1568 |
+
Create a time series plot of dissolved oxygen levels for surface and bottom measurements.
|
| 1569 |
+
|
| 1570 |
+
Reference:
|
| 1571 |
+
https://www.hudsonriver.org/ccmp/soe/water-quality/do
|
| 1572 |
+
|
| 1573 |
+
Parameters:
|
| 1574 |
+
-----------
|
| 1575 |
+
df : pd.DataFrame
|
| 1576 |
+
Filtered dataframe containing dissolved oxygen measurements
|
| 1577 |
+
period : str
|
| 1578 |
+
'yearly' or 'monthly' aggregation period
|
| 1579 |
+
epa_thresh : float
|
| 1580 |
+
EPA threshold value for DO in mg/L
|
| 1581 |
+
|
| 1582 |
+
Returns:
|
| 1583 |
+
--------
|
| 1584 |
+
Figure
|
| 1585 |
+
Matplotlib figure containing the plot
|
| 1586 |
+
"""
|
| 1587 |
+
period = period.lower()
|
| 1588 |
+
# Filter for DO data and pivot for surface/bottom
|
| 1589 |
+
do_data = df[
|
| 1590 |
+
(df["Org_Analyte_Name"] == "Dissolved Oxygen")
|
| 1591 |
+
& (df["Sample_Position"].isin(["Surface", "Bottom"]))
|
| 1592 |
+
].copy()
|
| 1593 |
+
|
| 1594 |
+
# Create time grouping based on period
|
| 1595 |
+
if period == "yearly":
|
| 1596 |
+
do_data["Period"] = do_data["Reporting_Year"]
|
| 1597 |
+
else: # monthly
|
| 1598 |
+
do_data["Period"] = pd.to_datetime(
|
| 1599 |
+
do_data["Activity_Start_Date_Time"]
|
| 1600 |
+
).dt.to_period("M")
|
| 1601 |
+
do_data["Period_Start"] = do_data["Period"].dt.to_timestamp()
|
| 1602 |
+
|
| 1603 |
+
# Calculate means for each position and period
|
| 1604 |
+
means = (
|
| 1605 |
+
do_data.groupby(["Period", "Sample_Position"], observed=True)[
|
| 1606 |
+
"Org_Result_Value"
|
| 1607 |
+
]
|
| 1608 |
+
.mean()
|
| 1609 |
+
.reset_index()
|
| 1610 |
+
.pivot(index="Period", columns="Sample_Position", values="Org_Result_Value")
|
| 1611 |
+
)
|
| 1612 |
+
|
| 1613 |
+
# Create figure
|
| 1614 |
+
fig, ax = plt.subplots(figsize=(15, 8))
|
| 1615 |
+
|
| 1616 |
+
# Convert Period index to proper format for plotting
|
| 1617 |
+
if period == "yearly":
|
| 1618 |
+
x_values = np.array(means.index.astype(float)) # Explicitly create numpy array
|
| 1619 |
+
else:
|
| 1620 |
+
# Convert to numpy array of datetime64
|
| 1621 |
+
x_values = np.array(
|
| 1622 |
+
[pd.Period(idx).to_timestamp() for idx in means.index],
|
| 1623 |
+
dtype="datetime64[ns]",
|
| 1624 |
+
)
|
| 1625 |
+
|
| 1626 |
+
# Plot connecting lines only (no markers)
|
| 1627 |
+
for i, (idx, row) in enumerate(means.iterrows()):
|
| 1628 |
+
x_val = x_values[i]
|
| 1629 |
+
ax.plot(
|
| 1630 |
+
[x_val, x_val], # Use scalar value instead of list
|
| 1631 |
+
[row["Bottom"], row["Surface"]],
|
| 1632 |
+
color="lightgray",
|
| 1633 |
+
linewidth=1,
|
| 1634 |
+
zorder=1,
|
| 1635 |
+
solid_capstyle="round",
|
| 1636 |
+
)
|
| 1637 |
+
|
| 1638 |
+
# Calculate dynamic point size based on number of points
|
| 1639 |
+
n_points = len(x_values)
|
| 1640 |
+
base_size = 80 # Maximum point size
|
| 1641 |
+
min_size = 20 # Minimum point size
|
| 1642 |
+
|
| 1643 |
+
# Exponential decay formula: size decreases as number of points increases
|
| 1644 |
+
point_size = max(
|
| 1645 |
+
min_size,
|
| 1646 |
+
base_size * math.exp(-0.0015 * n_points),
|
| 1647 |
+
)
|
| 1648 |
+
# Update scatter plot styling
|
| 1649 |
+
surface_scatter = ax.scatter(
|
| 1650 |
+
x_values,
|
| 1651 |
+
means["Surface"],
|
| 1652 |
+
color="#5AA4D8",
|
| 1653 |
+
s=point_size,
|
| 1654 |
+
zorder=2,
|
| 1655 |
+
label="Surface",
|
| 1656 |
+
edgecolors="white",
|
| 1657 |
+
linewidth=1,
|
| 1658 |
+
alpha=0.9,
|
| 1659 |
+
)
|
| 1660 |
+
bottom_scatter = ax.scatter(
|
| 1661 |
+
x_values,
|
| 1662 |
+
means["Bottom"],
|
| 1663 |
+
color="#1B4B8A",
|
| 1664 |
+
s=point_size,
|
| 1665 |
+
zorder=2,
|
| 1666 |
+
label="Bottom",
|
| 1667 |
+
edgecolors="white",
|
| 1668 |
+
linewidth=1,
|
| 1669 |
+
alpha=0.9,
|
| 1670 |
+
)
|
| 1671 |
+
|
| 1672 |
+
# Update EPA threshold line
|
| 1673 |
+
threshold_line = ax.axhline(
|
| 1674 |
+
y=epa_thresh,
|
| 1675 |
+
color="#FF8C00",
|
| 1676 |
+
linestyle="--",
|
| 1677 |
+
alpha=0.9,
|
| 1678 |
+
linewidth=1,
|
| 1679 |
+
label=f"EPA threshold: {epa_thresh} mg/L",
|
| 1680 |
+
zorder=0,
|
| 1681 |
+
)
|
| 1682 |
+
|
| 1683 |
+
# Customize legend
|
| 1684 |
+
ax.legend(
|
| 1685 |
+
handles=[surface_scatter, bottom_scatter, threshold_line],
|
| 1686 |
+
loc="upper right",
|
| 1687 |
+
frameon=False,
|
| 1688 |
+
ncol=1, # Stack legend items vertically
|
| 1689 |
+
bbox_to_anchor=(1.0, 1.0), # Position at top right
|
| 1690 |
+
handletextpad=0.5, # Reduce space between handle and text
|
| 1691 |
+
)
|
| 1692 |
+
|
| 1693 |
+
# Customize spines - only show bottom spine
|
| 1694 |
+
ax.spines["top"].set_visible(False)
|
| 1695 |
+
ax.spines["right"].set_visible(False)
|
| 1696 |
+
ax.spines["left"].set_visible(False)
|
| 1697 |
+
ax.spines["bottom"].set_color("black")
|
| 1698 |
+
ax.spines["bottom"].set_linewidth(0.5)
|
| 1699 |
+
|
| 1700 |
+
# Customize plot with modified grid and axis settings
|
| 1701 |
+
ax.set_xlabel("Year" if period == "yearly" else "Month")
|
| 1702 |
+
ax.set_ylabel("Dissolved Oxygen (mg/L)")
|
| 1703 |
+
ax.set_title("Long-term Dissolved Oxygen Trends")
|
| 1704 |
+
ax.grid(True, axis="y", alpha=0.15, linestyle="-", color="gray")
|
| 1705 |
+
|
| 1706 |
+
# Set y-axis limits with some padding
|
| 1707 |
+
ymin = max(int(min(means["Bottom"].min(), epa_thresh) * 0.9) - 1, 0)
|
| 1708 |
+
# ymin = 0
|
| 1709 |
+
ymax = means["Surface"].max() * 1.1
|
| 1710 |
+
ax.set_ylim(ymin, ymax)
|
| 1711 |
+
yticks = np.arange(ymin, ymax, 2)
|
| 1712 |
+
ax.set_yticks(yticks)
|
| 1713 |
+
|
| 1714 |
+
# Remove tick marks but keep labels
|
| 1715 |
+
ax.tick_params(axis="y", which="both", length=0) # Remove y-axis tick marks
|
| 1716 |
+
|
| 1717 |
+
# Adjust x-axis ticks and limits
|
| 1718 |
+
if period == "monthly":
|
| 1719 |
+
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
|
| 1720 |
+
ax.xaxis.set_major_locator(mdates.YearLocator())
|
| 1721 |
+
plt.xticks(rotation=0)
|
| 1722 |
+
|
| 1723 |
+
# Convert to datetime for padding
|
| 1724 |
+
start_date = mdates.date2num(
|
| 1725 |
+
pd.Timestamp(min(x_values)) - pd.DateOffset(months=1)
|
| 1726 |
+
)
|
| 1727 |
+
end_date = mdates.date2num(
|
| 1728 |
+
pd.Timestamp(max(x_values)) + pd.DateOffset(months=1)
|
| 1729 |
+
)
|
| 1730 |
+
ax.set_xlim(mdates.num2date(start_date), mdates.num2date(end_date))
|
| 1731 |
+
else:
|
| 1732 |
+
# For yearly data, ensure whole number ticks but month-based padding
|
| 1733 |
+
min_year = float(np.floor(min(x_values)))
|
| 1734 |
+
max_year = float(np.ceil(max(x_values)))
|
| 1735 |
+
|
| 1736 |
+
# Set whole number ticks
|
| 1737 |
+
years = np.arange(min_year, max_year + 1)
|
| 1738 |
+
ax.set_xticks(years)
|
| 1739 |
+
|
| 1740 |
+
# Set limits with one month padding
|
| 1741 |
+
ax.set_xlim(
|
| 1742 |
+
min_year - 0.083, max_year + 0.083
|
| 1743 |
+
) # ~1/12 of a year for month padding
|
| 1744 |
+
|
| 1745 |
+
# Move y-axis labels to the left of the gridlines
|
| 1746 |
+
ax.yaxis.tick_left()
|
| 1747 |
+
ax.yaxis.set_label_position("left")
|
| 1748 |
+
|
| 1749 |
+
plt.tight_layout()
|
| 1750 |
+
return fig
|
app.py
CHANGED
|
@@ -5,6 +5,7 @@ from config import AppConfig
|
|
| 5 |
from dashboard_analytics import render_analytics_page
|
| 6 |
from pages import (
|
| 7 |
calendar_heatmaps_page,
|
|
|
|
| 8 |
do_temp_relationship_page,
|
| 9 |
home_page,
|
| 10 |
nutrient_ratios_page,
|
|
@@ -48,6 +49,7 @@ page_dict["Water Quality Portal"] = [
|
|
| 48 |
calendar_heatmaps_page,
|
| 49 |
seasonal_trends_page,
|
| 50 |
nutrient_ratios_page,
|
|
|
|
| 51 |
do_temp_relationship_page,
|
| 52 |
raw_data_page,
|
| 53 |
settings_page,
|
|
|
|
| 5 |
from dashboard_analytics import render_analytics_page
|
| 6 |
from pages import (
|
| 7 |
calendar_heatmaps_page,
|
| 8 |
+
dissolved_oxygen_page,
|
| 9 |
do_temp_relationship_page,
|
| 10 |
home_page,
|
| 11 |
nutrient_ratios_page,
|
|
|
|
| 49 |
calendar_heatmaps_page,
|
| 50 |
seasonal_trends_page,
|
| 51 |
nutrient_ratios_page,
|
| 52 |
+
dissolved_oxygen_page,
|
| 53 |
do_temp_relationship_page,
|
| 54 |
raw_data_page,
|
| 55 |
settings_page,
|
pages.py
CHANGED
|
@@ -551,7 +551,7 @@ def seasonal_trends_section():
|
|
| 551 |
# Move filters to sidebar
|
| 552 |
st.sidebar.markdown("### Filter Options")
|
| 553 |
analyte = st.sidebar.selectbox(
|
| 554 |
-
"Select
|
| 555 |
)
|
| 556 |
selected_year = st.sidebar.selectbox(
|
| 557 |
"Select Year:", sorted(years, reverse=True), index=0, key="seasonal_year_select"
|
|
@@ -906,3 +906,8 @@ nutrient_ratios_page = st.Page(
|
|
| 906 |
icon=":material/scatter_plot:",
|
| 907 |
)
|
| 908 |
settings_page = st.Page("settings.py", title="Settings", icon=":material/settings:")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 551 |
# Move filters to sidebar
|
| 552 |
st.sidebar.markdown("### Filter Options")
|
| 553 |
analyte = st.sidebar.selectbox(
|
| 554 |
+
"Select Parameter:", ["Salinity"], index=0, key="seasonal_analyte_select"
|
| 555 |
)
|
| 556 |
selected_year = st.sidebar.selectbox(
|
| 557 |
"Select Year:", sorted(years, reverse=True), index=0, key="seasonal_year_select"
|
|
|
|
| 906 |
icon=":material/scatter_plot:",
|
| 907 |
)
|
| 908 |
settings_page = st.Page("settings.py", title="Settings", icon=":material/settings:")
|
| 909 |
+
dissolved_oxygen_page = st.Page(
|
| 910 |
+
"ui/pages/dissolved_oxygen.py",
|
| 911 |
+
title="Dissolved Oxygen",
|
| 912 |
+
icon=":material/water_drop:",
|
| 913 |
+
)
|
ui/__init__.py
ADDED
|
File without changes
|
ui/pages/__init__.py
ADDED
|
File without changes
|
ui/pages/dissolved_oxygen.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
|
| 3 |
+
from analysis import plot_do_timeseries
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def dissolved_oxygen():
|
| 7 |
+
# Add period selector to sidebar
|
| 8 |
+
period = st.sidebar.radio(
|
| 9 |
+
"Time Period", options=["Monthly", "Yearly"], key="do_period"
|
| 10 |
+
)
|
| 11 |
+
sector_options = ["All"] + list(st.session_state.data["raw_df"]["Sector"].unique())
|
| 12 |
+
sector = st.sidebar.selectbox("Sector", options=sector_options, key="do_sector")
|
| 13 |
+
if sector == "All":
|
| 14 |
+
filtered_df = st.session_state.data["raw_df"]
|
| 15 |
+
else:
|
| 16 |
+
filtered_df = st.session_state.data["raw_df"][
|
| 17 |
+
st.session_state.data["raw_df"]["Sector"] == sector
|
| 18 |
+
]
|
| 19 |
+
|
| 20 |
+
return plot_do_timeseries(filtered_df, period=period)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
st.info(
|
| 24 |
+
"""
|
| 25 |
+
This is a half-baked attempt to replicate the Hudson River Estuary Program's excellent
|
| 26 |
+
[Long Term Findings chart for dissolved oxygen](https://www.hudsonriver.org/ccmp/soe/water-quality/do).
|
| 27 |
+
The EPA threshold is copied from the reference image and not specific to the water quality standards for our area.
|
| 28 |
+
"""
|
| 29 |
+
)
|
| 30 |
+
fig = dissolved_oxygen()
|
| 31 |
+
st.pyplot(fig)
|
utils/data_loading.py
CHANGED
|
@@ -114,7 +114,7 @@ class DataManager:
|
|
| 114 |
return self._get_empty_data_structure()
|
| 115 |
|
| 116 |
|
| 117 |
-
@timer(include_params=
|
| 118 |
def get_raw_data(file_path: str) -> pd.DataFrame:
|
| 119 |
"""Load raw data from parquet file"""
|
| 120 |
return pd.read_parquet(file_path)
|
|
@@ -200,6 +200,7 @@ def add_lat_long(raw_df: pd.DataFrame, stations_df: pd.DataFrame) -> pd.DataFram
|
|
| 200 |
return raw_df.drop("Number", axis=1)
|
| 201 |
|
| 202 |
|
|
|
|
| 203 |
def get_stations_data() -> pd.DataFrame:
|
| 204 |
"""
|
| 205 |
Return stations data as a dataframe with the most recent and earliest sample dates for each station.
|
|
|
|
| 114 |
return self._get_empty_data_structure()
|
| 115 |
|
| 116 |
|
| 117 |
+
@timer(include_params=True)
|
| 118 |
def get_raw_data(file_path: str) -> pd.DataFrame:
|
| 119 |
"""Load raw data from parquet file"""
|
| 120 |
return pd.read_parquet(file_path)
|
|
|
|
| 200 |
return raw_df.drop("Number", axis=1)
|
| 201 |
|
| 202 |
|
| 203 |
+
@timer(include_params=False)
|
| 204 |
def get_stations_data() -> pd.DataFrame:
|
| 205 |
"""
|
| 206 |
Return stations data as a dataframe with the most recent and earliest sample dates for each station.
|