github-actions[bot] commited on
Commit
4f7ea6b
·
1 Parent(s): 697df06

Deploy from GitHub Actions

Browse files
analysis.py CHANGED
@@ -1,9 +1,11 @@
 
1
  import sys
2
  from pathlib import Path
3
 
4
  import altair as alt
5
  import contextily as ctx
6
  import geopandas as gpd
 
7
  import matplotlib.pyplot as plt
8
  import numpy as np
9
  import pandas as pd
@@ -1554,3 +1556,195 @@ def generate_seasonal_plot(data, year, shapefile_path):
1554
  wbids=wbids,
1555
  reporting_end_month=st.session_state.reporting_month,
1556
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
  import sys
3
  from pathlib import Path
4
 
5
  import altair as alt
6
  import contextily as ctx
7
  import geopandas as gpd
8
+ import matplotlib.dates as mdates
9
  import matplotlib.pyplot as plt
10
  import numpy as np
11
  import pandas as pd
 
1556
  wbids=wbids,
1557
  reporting_end_month=st.session_state.reporting_month,
1558
  )
1559
+
1560
+
1561
+ def plot_do_timeseries(
1562
+ df: pd.DataFrame,
1563
+ period: str = "Yearly",
1564
+ sector: str = "All",
1565
+ epa_thresh: float = 4.8,
1566
+ ) -> Figure:
1567
+ """
1568
+ Create a time series plot of dissolved oxygen levels for surface and bottom measurements.
1569
+
1570
+ Reference:
1571
+ https://www.hudsonriver.org/ccmp/soe/water-quality/do
1572
+
1573
+ Parameters:
1574
+ -----------
1575
+ df : pd.DataFrame
1576
+ Filtered dataframe containing dissolved oxygen measurements
1577
+ period : str
1578
+ 'yearly' or 'monthly' aggregation period
1579
+ epa_thresh : float
1580
+ EPA threshold value for DO in mg/L
1581
+
1582
+ Returns:
1583
+ --------
1584
+ Figure
1585
+ Matplotlib figure containing the plot
1586
+ """
1587
+ period = period.lower()
1588
+ # Filter for DO data and pivot for surface/bottom
1589
+ do_data = df[
1590
+ (df["Org_Analyte_Name"] == "Dissolved Oxygen")
1591
+ & (df["Sample_Position"].isin(["Surface", "Bottom"]))
1592
+ ].copy()
1593
+
1594
+ # Create time grouping based on period
1595
+ if period == "yearly":
1596
+ do_data["Period"] = do_data["Reporting_Year"]
1597
+ else: # monthly
1598
+ do_data["Period"] = pd.to_datetime(
1599
+ do_data["Activity_Start_Date_Time"]
1600
+ ).dt.to_period("M")
1601
+ do_data["Period_Start"] = do_data["Period"].dt.to_timestamp()
1602
+
1603
+ # Calculate means for each position and period
1604
+ means = (
1605
+ do_data.groupby(["Period", "Sample_Position"], observed=True)[
1606
+ "Org_Result_Value"
1607
+ ]
1608
+ .mean()
1609
+ .reset_index()
1610
+ .pivot(index="Period", columns="Sample_Position", values="Org_Result_Value")
1611
+ )
1612
+
1613
+ # Create figure
1614
+ fig, ax = plt.subplots(figsize=(15, 8))
1615
+
1616
+ # Convert Period index to proper format for plotting
1617
+ if period == "yearly":
1618
+ x_values = np.array(means.index.astype(float)) # Explicitly create numpy array
1619
+ else:
1620
+ # Convert to numpy array of datetime64
1621
+ x_values = np.array(
1622
+ [pd.Period(idx).to_timestamp() for idx in means.index],
1623
+ dtype="datetime64[ns]",
1624
+ )
1625
+
1626
+ # Plot connecting lines only (no markers)
1627
+ for i, (idx, row) in enumerate(means.iterrows()):
1628
+ x_val = x_values[i]
1629
+ ax.plot(
1630
+ [x_val, x_val], # Use scalar value instead of list
1631
+ [row["Bottom"], row["Surface"]],
1632
+ color="lightgray",
1633
+ linewidth=1,
1634
+ zorder=1,
1635
+ solid_capstyle="round",
1636
+ )
1637
+
1638
+ # Calculate dynamic point size based on number of points
1639
+ n_points = len(x_values)
1640
+ base_size = 80 # Maximum point size
1641
+ min_size = 20 # Minimum point size
1642
+
1643
+ # Exponential decay formula: size decreases as number of points increases
1644
+ point_size = max(
1645
+ min_size,
1646
+ base_size * math.exp(-0.0015 * n_points),
1647
+ )
1648
+ # Update scatter plot styling
1649
+ surface_scatter = ax.scatter(
1650
+ x_values,
1651
+ means["Surface"],
1652
+ color="#5AA4D8",
1653
+ s=point_size,
1654
+ zorder=2,
1655
+ label="Surface",
1656
+ edgecolors="white",
1657
+ linewidth=1,
1658
+ alpha=0.9,
1659
+ )
1660
+ bottom_scatter = ax.scatter(
1661
+ x_values,
1662
+ means["Bottom"],
1663
+ color="#1B4B8A",
1664
+ s=point_size,
1665
+ zorder=2,
1666
+ label="Bottom",
1667
+ edgecolors="white",
1668
+ linewidth=1,
1669
+ alpha=0.9,
1670
+ )
1671
+
1672
+ # Update EPA threshold line
1673
+ threshold_line = ax.axhline(
1674
+ y=epa_thresh,
1675
+ color="#FF8C00",
1676
+ linestyle="--",
1677
+ alpha=0.9,
1678
+ linewidth=1,
1679
+ label=f"EPA threshold: {epa_thresh} mg/L",
1680
+ zorder=0,
1681
+ )
1682
+
1683
+ # Customize legend
1684
+ ax.legend(
1685
+ handles=[surface_scatter, bottom_scatter, threshold_line],
1686
+ loc="upper right",
1687
+ frameon=False,
1688
+ ncol=1, # Stack legend items vertically
1689
+ bbox_to_anchor=(1.0, 1.0), # Position at top right
1690
+ handletextpad=0.5, # Reduce space between handle and text
1691
+ )
1692
+
1693
+ # Customize spines - only show bottom spine
1694
+ ax.spines["top"].set_visible(False)
1695
+ ax.spines["right"].set_visible(False)
1696
+ ax.spines["left"].set_visible(False)
1697
+ ax.spines["bottom"].set_color("black")
1698
+ ax.spines["bottom"].set_linewidth(0.5)
1699
+
1700
+ # Customize plot with modified grid and axis settings
1701
+ ax.set_xlabel("Year" if period == "yearly" else "Month")
1702
+ ax.set_ylabel("Dissolved Oxygen (mg/L)")
1703
+ ax.set_title("Long-term Dissolved Oxygen Trends")
1704
+ ax.grid(True, axis="y", alpha=0.15, linestyle="-", color="gray")
1705
+
1706
+ # Set y-axis limits with some padding
1707
+ ymin = max(int(min(means["Bottom"].min(), epa_thresh) * 0.9) - 1, 0)
1708
+ # ymin = 0
1709
+ ymax = means["Surface"].max() * 1.1
1710
+ ax.set_ylim(ymin, ymax)
1711
+ yticks = np.arange(ymin, ymax, 2)
1712
+ ax.set_yticks(yticks)
1713
+
1714
+ # Remove tick marks but keep labels
1715
+ ax.tick_params(axis="y", which="both", length=0) # Remove y-axis tick marks
1716
+
1717
+ # Adjust x-axis ticks and limits
1718
+ if period == "monthly":
1719
+ ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
1720
+ ax.xaxis.set_major_locator(mdates.YearLocator())
1721
+ plt.xticks(rotation=0)
1722
+
1723
+ # Convert to datetime for padding
1724
+ start_date = mdates.date2num(
1725
+ pd.Timestamp(min(x_values)) - pd.DateOffset(months=1)
1726
+ )
1727
+ end_date = mdates.date2num(
1728
+ pd.Timestamp(max(x_values)) + pd.DateOffset(months=1)
1729
+ )
1730
+ ax.set_xlim(mdates.num2date(start_date), mdates.num2date(end_date))
1731
+ else:
1732
+ # For yearly data, ensure whole number ticks but month-based padding
1733
+ min_year = float(np.floor(min(x_values)))
1734
+ max_year = float(np.ceil(max(x_values)))
1735
+
1736
+ # Set whole number ticks
1737
+ years = np.arange(min_year, max_year + 1)
1738
+ ax.set_xticks(years)
1739
+
1740
+ # Set limits with one month padding
1741
+ ax.set_xlim(
1742
+ min_year - 0.083, max_year + 0.083
1743
+ ) # ~1/12 of a year for month padding
1744
+
1745
+ # Move y-axis labels to the left of the gridlines
1746
+ ax.yaxis.tick_left()
1747
+ ax.yaxis.set_label_position("left")
1748
+
1749
+ plt.tight_layout()
1750
+ return fig
app.py CHANGED
@@ -5,6 +5,7 @@ from config import AppConfig
5
  from dashboard_analytics import render_analytics_page
6
  from pages import (
7
  calendar_heatmaps_page,
 
8
  do_temp_relationship_page,
9
  home_page,
10
  nutrient_ratios_page,
@@ -48,6 +49,7 @@ page_dict["Water Quality Portal"] = [
48
  calendar_heatmaps_page,
49
  seasonal_trends_page,
50
  nutrient_ratios_page,
 
51
  do_temp_relationship_page,
52
  raw_data_page,
53
  settings_page,
 
5
  from dashboard_analytics import render_analytics_page
6
  from pages import (
7
  calendar_heatmaps_page,
8
+ dissolved_oxygen_page,
9
  do_temp_relationship_page,
10
  home_page,
11
  nutrient_ratios_page,
 
49
  calendar_heatmaps_page,
50
  seasonal_trends_page,
51
  nutrient_ratios_page,
52
+ dissolved_oxygen_page,
53
  do_temp_relationship_page,
54
  raw_data_page,
55
  settings_page,
pages.py CHANGED
@@ -551,7 +551,7 @@ def seasonal_trends_section():
551
  # Move filters to sidebar
552
  st.sidebar.markdown("### Filter Options")
553
  analyte = st.sidebar.selectbox(
554
- "Select Analyte:", ["Salinity"], index=0, key="seasonal_analyte_select"
555
  )
556
  selected_year = st.sidebar.selectbox(
557
  "Select Year:", sorted(years, reverse=True), index=0, key="seasonal_year_select"
@@ -906,3 +906,8 @@ nutrient_ratios_page = st.Page(
906
  icon=":material/scatter_plot:",
907
  )
908
  settings_page = st.Page("settings.py", title="Settings", icon=":material/settings:")
 
 
 
 
 
 
551
  # Move filters to sidebar
552
  st.sidebar.markdown("### Filter Options")
553
  analyte = st.sidebar.selectbox(
554
+ "Select Parameter:", ["Salinity"], index=0, key="seasonal_analyte_select"
555
  )
556
  selected_year = st.sidebar.selectbox(
557
  "Select Year:", sorted(years, reverse=True), index=0, key="seasonal_year_select"
 
906
  icon=":material/scatter_plot:",
907
  )
908
  settings_page = st.Page("settings.py", title="Settings", icon=":material/settings:")
909
+ dissolved_oxygen_page = st.Page(
910
+ "ui/pages/dissolved_oxygen.py",
911
+ title="Dissolved Oxygen",
912
+ icon=":material/water_drop:",
913
+ )
ui/__init__.py ADDED
File without changes
ui/pages/__init__.py ADDED
File without changes
ui/pages/dissolved_oxygen.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from analysis import plot_do_timeseries
4
+
5
+
6
+ def dissolved_oxygen():
7
+ # Add period selector to sidebar
8
+ period = st.sidebar.radio(
9
+ "Time Period", options=["Monthly", "Yearly"], key="do_period"
10
+ )
11
+ sector_options = ["All"] + list(st.session_state.data["raw_df"]["Sector"].unique())
12
+ sector = st.sidebar.selectbox("Sector", options=sector_options, key="do_sector")
13
+ if sector == "All":
14
+ filtered_df = st.session_state.data["raw_df"]
15
+ else:
16
+ filtered_df = st.session_state.data["raw_df"][
17
+ st.session_state.data["raw_df"]["Sector"] == sector
18
+ ]
19
+
20
+ return plot_do_timeseries(filtered_df, period=period)
21
+
22
+
23
+ st.info(
24
+ """
25
+ This is a half-baked attempt to replicate the Hudson River Estuary Program's excellent
26
+ [Long Term Findings chart for dissolved oxygen](https://www.hudsonriver.org/ccmp/soe/water-quality/do).
27
+ The EPA threshold is copied from the reference image and not specific to the water quality standards for our area.
28
+ """
29
+ )
30
+ fig = dissolved_oxygen()
31
+ st.pyplot(fig)
utils/data_loading.py CHANGED
@@ -114,7 +114,7 @@ class DataManager:
114
  return self._get_empty_data_structure()
115
 
116
 
117
- @timer(include_params=False)
118
  def get_raw_data(file_path: str) -> pd.DataFrame:
119
  """Load raw data from parquet file"""
120
  return pd.read_parquet(file_path)
@@ -200,6 +200,7 @@ def add_lat_long(raw_df: pd.DataFrame, stations_df: pd.DataFrame) -> pd.DataFram
200
  return raw_df.drop("Number", axis=1)
201
 
202
 
 
203
  def get_stations_data() -> pd.DataFrame:
204
  """
205
  Return stations data as a dataframe with the most recent and earliest sample dates for each station.
 
114
  return self._get_empty_data_structure()
115
 
116
 
117
+ @timer(include_params=True)
118
  def get_raw_data(file_path: str) -> pd.DataFrame:
119
  """Load raw data from parquet file"""
120
  return pd.read_parquet(file_path)
 
200
  return raw_df.drop("Number", axis=1)
201
 
202
 
203
+ @timer(include_params=False)
204
  def get_stations_data() -> pd.DataFrame:
205
  """
206
  Return stations data as a dataframe with the most recent and earliest sample dates for each station.