github-actions[bot] commited on
Commit ·
39e6d5e
1
Parent(s): bb82b34
Add all files with LFS support
Browse files- analysis.py +132 -0
- data/master_data_file_2010-01-01_-_2024-10-31.parquet +0 -0
- main.py +14 -12
- pages.py +17 -8
- utils/data_loading.py +1 -1
analysis.py
CHANGED
|
@@ -1254,6 +1254,138 @@ def plot_do_temp_relationship(df: pd.DataFrame) -> Figure:
|
|
| 1254 |
return g.figure
|
| 1255 |
|
| 1256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1257 |
def altair_plot_do_temp_relationship(df: pd.DataFrame) -> alt.LayerChart:
|
| 1258 |
"""
|
| 1259 |
Create an interactive scatter plot of DO vs temperature with regression lines using Altair.
|
|
|
|
| 1254 |
return g.figure
|
| 1255 |
|
| 1256 |
|
| 1257 |
+
def plotly_plot_do_temp_relationship(df: pd.DataFrame) -> go.Figure:
|
| 1258 |
+
"""
|
| 1259 |
+
Create an interactive scatter plot of DO vs temperature with regression lines using Plotly.
|
| 1260 |
+
Matches the style and features of the original matplotlib/seaborn plot.
|
| 1261 |
+
|
| 1262 |
+
Parameters:
|
| 1263 |
+
-----------
|
| 1264 |
+
df : pd.DataFrame
|
| 1265 |
+
Input dataframe containing DO and temperature measurements
|
| 1266 |
+
|
| 1267 |
+
Returns:
|
| 1268 |
+
--------
|
| 1269 |
+
go.Figure
|
| 1270 |
+
Plotly figure object
|
| 1271 |
+
"""
|
| 1272 |
+
# Prepare the data similarly to the original function
|
| 1273 |
+
do_temp_data = (
|
| 1274 |
+
df[df["Org_Analyte_Name"].isin(["Dissolved Oxygen", "Temperature, Water"])]
|
| 1275 |
+
.pivot_table(
|
| 1276 |
+
index=[
|
| 1277 |
+
"Activity_Start_Date_Time",
|
| 1278 |
+
"Station_Number",
|
| 1279 |
+
"Sample_Position",
|
| 1280 |
+
"Sector", # Added for tooltip
|
| 1281 |
+
],
|
| 1282 |
+
columns="Org_Analyte_Name",
|
| 1283 |
+
values="Org_Result_Value",
|
| 1284 |
+
observed=True,
|
| 1285 |
+
)
|
| 1286 |
+
.reset_index()
|
| 1287 |
+
.dropna(subset=["Dissolved Oxygen", "Temperature, Water"])
|
| 1288 |
+
)
|
| 1289 |
+
|
| 1290 |
+
# Create figure
|
| 1291 |
+
fig = go.Figure()
|
| 1292 |
+
|
| 1293 |
+
# Colors matching seaborn's muted palette
|
| 1294 |
+
colors = {"Surface": "#8da0cb", "Bottom": "#fc8d62"}
|
| 1295 |
+
|
| 1296 |
+
# Add scatter plots and regression lines for each position
|
| 1297 |
+
for position in ["Surface", "Bottom"]:
|
| 1298 |
+
pos_data = do_temp_data[do_temp_data["Sample_Position"] == position]
|
| 1299 |
+
|
| 1300 |
+
# Add scatter plot
|
| 1301 |
+
fig.add_trace(
|
| 1302 |
+
go.Scatter(
|
| 1303 |
+
x=pos_data["Temperature, Water"],
|
| 1304 |
+
y=pos_data["Dissolved Oxygen"],
|
| 1305 |
+
mode="markers",
|
| 1306 |
+
name=position,
|
| 1307 |
+
marker=dict(color=colors[position], size=8, opacity=0.6),
|
| 1308 |
+
hovertemplate=(
|
| 1309 |
+
"Temperature: %{x:.1f}°C<br>"
|
| 1310 |
+
"DO: %{y:.1f} mg/L<br>"
|
| 1311 |
+
"Position: " + position + "<br>"
|
| 1312 |
+
"Station: %{customdata[0]}<br>"
|
| 1313 |
+
"Sector: %{customdata[1]}<br>"
|
| 1314 |
+
"<extra></extra>"
|
| 1315 |
+
),
|
| 1316 |
+
customdata=pos_data[["Station_Number", "Sector"]],
|
| 1317 |
+
)
|
| 1318 |
+
)
|
| 1319 |
+
|
| 1320 |
+
# Calculate and add regression line
|
| 1321 |
+
z = np.polyfit(pos_data["Temperature, Water"], pos_data["Dissolved Oxygen"], 1)
|
| 1322 |
+
p = np.poly1d(z)
|
| 1323 |
+
x_range = np.linspace(
|
| 1324 |
+
pos_data["Temperature, Water"].min(),
|
| 1325 |
+
pos_data["Temperature, Water"].max(),
|
| 1326 |
+
100,
|
| 1327 |
+
)
|
| 1328 |
+
|
| 1329 |
+
fig.add_trace(
|
| 1330 |
+
go.Scatter(
|
| 1331 |
+
x=x_range,
|
| 1332 |
+
y=p(x_range),
|
| 1333 |
+
mode="lines",
|
| 1334 |
+
line=dict(color=colors[position], dash="dash"),
|
| 1335 |
+
name=f"{position} Trend",
|
| 1336 |
+
hovertemplate=None,
|
| 1337 |
+
hoverinfo="skip",
|
| 1338 |
+
showlegend=False,
|
| 1339 |
+
)
|
| 1340 |
+
)
|
| 1341 |
+
|
| 1342 |
+
# Add DO threshold line
|
| 1343 |
+
fig.add_hline(
|
| 1344 |
+
y=5,
|
| 1345 |
+
line=dict(color="red", width=1, dash="dot"),
|
| 1346 |
+
opacity=0.5,
|
| 1347 |
+
annotation_text="5 mg/L DO threshold",
|
| 1348 |
+
annotation_position="left",
|
| 1349 |
+
annotation=dict(
|
| 1350 |
+
font=dict(color="red", size=12),
|
| 1351 |
+
xanchor="left",
|
| 1352 |
+
yanchor="bottom",
|
| 1353 |
+
opacity=0.5,
|
| 1354 |
+
),
|
| 1355 |
+
)
|
| 1356 |
+
|
| 1357 |
+
# Update layout
|
| 1358 |
+
fig.update_layout(
|
| 1359 |
+
title=dict(
|
| 1360 |
+
text="Dissolved Oxygen vs Water Temperature",
|
| 1361 |
+
x=0.5,
|
| 1362 |
+
y=0.95,
|
| 1363 |
+
xanchor="center",
|
| 1364 |
+
yanchor="top",
|
| 1365 |
+
font=dict(size=16),
|
| 1366 |
+
),
|
| 1367 |
+
xaxis_title="Water Temperature (°C)",
|
| 1368 |
+
yaxis_title="Dissolved Oxygen (mg/L)",
|
| 1369 |
+
legend_title="Sample Position",
|
| 1370 |
+
legend=dict(
|
| 1371 |
+
yanchor="top",
|
| 1372 |
+
y=1,
|
| 1373 |
+
xanchor="left",
|
| 1374 |
+
x=1.05,
|
| 1375 |
+
),
|
| 1376 |
+
template="plotly_white",
|
| 1377 |
+
width=800,
|
| 1378 |
+
height=600,
|
| 1379 |
+
showlegend=True,
|
| 1380 |
+
)
|
| 1381 |
+
|
| 1382 |
+
# Update axes
|
| 1383 |
+
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="rgba(128, 128, 128, 0.2)")
|
| 1384 |
+
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="rgba(128, 128, 128, 0.2)")
|
| 1385 |
+
|
| 1386 |
+
return fig
|
| 1387 |
+
|
| 1388 |
+
|
| 1389 |
def altair_plot_do_temp_relationship(df: pd.DataFrame) -> alt.LayerChart:
|
| 1390 |
"""
|
| 1391 |
Create an interactive scatter plot of DO vs temperature with regression lines using Altair.
|
data/master_data_file_2010-01-01_-_2024-10-31.parquet
ADDED
|
Binary file (493 kB). View file
|
|
|
main.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
|
| 3 |
|
| 4 |
-
def
|
| 5 |
"""
|
| 6 |
-
|
| 7 |
"""
|
| 8 |
-
|
| 9 |
-
return pd.read_parquet(file_path)
|
| 10 |
-
|
| 11 |
categorical_columns = [
|
| 12 |
"Monitoring_Location_ID",
|
| 13 |
"Activity_Depth_Unit",
|
|
@@ -27,11 +25,15 @@ def get_raw_data(file_path: str):
|
|
| 27 |
**{col: "category" for col in categorical_columns},
|
| 28 |
}
|
| 29 |
|
| 30 |
-
return
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
)
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
|
| 3 |
|
| 4 |
+
def master_data_csv_to_parquet(file_path: str):
|
| 5 |
"""
|
| 6 |
+
Convert master data export from a CSV to a Parquet file.
|
| 7 |
"""
|
| 8 |
+
save_path = file_path.replace(".csv", ".parquet")
|
|
|
|
|
|
|
| 9 |
categorical_columns = [
|
| 10 |
"Monitoring_Location_ID",
|
| 11 |
"Activity_Depth_Unit",
|
|
|
|
| 25 |
**{col: "category" for col in categorical_columns},
|
| 26 |
}
|
| 27 |
|
| 28 |
+
return (
|
| 29 |
+
pd.read_csv(file_path, dtype=dtype_dict)
|
| 30 |
+
.assign(
|
| 31 |
+
Org_Result_Value=lambda df: pd.to_numeric(
|
| 32 |
+
df["Org_Result_Value"].replace("Not Reported", pd.NA), errors="coerce"
|
| 33 |
+
),
|
| 34 |
+
Activity_Start_Date_Time=lambda df: pd.to_datetime(
|
| 35 |
+
df["Activity_Start_Date_Time"]
|
| 36 |
+
),
|
| 37 |
+
)
|
| 38 |
+
.to_parquet(save_path)
|
| 39 |
)
|
pages.py
CHANGED
|
@@ -7,7 +7,6 @@ import pandas as pd
|
|
| 7 |
import streamlit as st
|
| 8 |
|
| 9 |
from analysis import (
|
| 10 |
-
altair_plot_do_temp_relationship,
|
| 11 |
altair_plot_np_ratios,
|
| 12 |
altair_plot_sector_trends,
|
| 13 |
generate_seasonal_plot,
|
|
@@ -16,6 +15,7 @@ from analysis import (
|
|
| 16 |
plot_np_ratios,
|
| 17 |
plot_sector_trends,
|
| 18 |
plot_trends_by_station,
|
|
|
|
| 19 |
)
|
| 20 |
from components import (
|
| 21 |
get_reporting_year_info_message,
|
|
@@ -187,18 +187,24 @@ def home_section():
|
|
| 187 |
stat_col1, stat_col2 = st.columns(2)
|
| 188 |
|
| 189 |
with stat_col1:
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
st.metric("Sectors", len(stations_df["Sector"].unique()))
|
| 192 |
-
st.metric("
|
| 193 |
|
| 194 |
with stat_col2:
|
| 195 |
-
st.metric("Total
|
|
|
|
| 196 |
|
| 197 |
with map_col1:
|
| 198 |
render_stations_map(stations_df)
|
| 199 |
|
| 200 |
# Add stations table
|
| 201 |
-
st.markdown("###
|
| 202 |
|
| 203 |
# Create a simplified view of the stations data
|
| 204 |
display_columns = [
|
|
@@ -289,12 +295,15 @@ def home_section():
|
|
| 289 |
csv_buffer = io.StringIO()
|
| 290 |
stations_table.to_csv(csv_buffer, index=False)
|
| 291 |
st.download_button(
|
| 292 |
-
label="Download
|
| 293 |
data=csv_buffer.getvalue(),
|
| 294 |
file_name="monitoring_stations.csv",
|
| 295 |
mime="text/csv",
|
| 296 |
)
|
| 297 |
|
|
|
|
|
|
|
|
|
|
| 298 |
# Summary Section
|
| 299 |
st.markdown("## Data Summary")
|
| 300 |
tab1, tab2 = st.tabs(["Overall Summary", "Summary by Station"])
|
|
@@ -347,8 +356,8 @@ def do_temp_relationship_section():
|
|
| 347 |
"This plot shows the relationship between dissolved oxygen and water temperature for all data."
|
| 348 |
)
|
| 349 |
if st.session_state.ENABLE_ALTAIR:
|
| 350 |
-
fig =
|
| 351 |
-
st.
|
| 352 |
else:
|
| 353 |
fig = plot_do_temp_relationship(st.session_state.data["raw_df"])
|
| 354 |
st.pyplot(fig)
|
|
|
|
| 7 |
import streamlit as st
|
| 8 |
|
| 9 |
from analysis import (
|
|
|
|
| 10 |
altair_plot_np_ratios,
|
| 11 |
altair_plot_sector_trends,
|
| 12 |
generate_seasonal_plot,
|
|
|
|
| 15 |
plot_np_ratios,
|
| 16 |
plot_sector_trends,
|
| 17 |
plot_trends_by_station,
|
| 18 |
+
plotly_plot_do_temp_relationship,
|
| 19 |
)
|
| 20 |
from components import (
|
| 21 |
get_reporting_year_info_message,
|
|
|
|
| 187 |
stat_col1, stat_col2 = st.columns(2)
|
| 188 |
|
| 189 |
with stat_col1:
|
| 190 |
+
# Calculate active stations (sampled within last 12 months)
|
| 191 |
+
today = pd.Timestamp.today()
|
| 192 |
+
active_mask = pd.to_datetime(stations_df["Most_Recent_Sample"]) > (
|
| 193 |
+
today - pd.DateOffset(months=12)
|
| 194 |
+
)
|
| 195 |
+
st.metric("Active Stations", active_mask.sum())
|
| 196 |
st.metric("Sectors", len(stations_df["Sector"].unique()))
|
| 197 |
+
st.metric("Total Samples", f"{int(stations_df['Total_Samples'].sum()):,}")
|
| 198 |
|
| 199 |
with stat_col2:
|
| 200 |
+
st.metric("Total Stations", len(stations_df))
|
| 201 |
+
st.metric("Waterbody IDs", len(stations_df["WBID"].unique()))
|
| 202 |
|
| 203 |
with map_col1:
|
| 204 |
render_stations_map(stations_df)
|
| 205 |
|
| 206 |
# Add stations table
|
| 207 |
+
st.markdown("### Station Details")
|
| 208 |
|
| 209 |
# Create a simplified view of the stations data
|
| 210 |
display_columns = [
|
|
|
|
| 295 |
csv_buffer = io.StringIO()
|
| 296 |
stations_table.to_csv(csv_buffer, index=False)
|
| 297 |
st.download_button(
|
| 298 |
+
label="Download Station Details",
|
| 299 |
data=csv_buffer.getvalue(),
|
| 300 |
file_name="monitoring_stations.csv",
|
| 301 |
mime="text/csv",
|
| 302 |
)
|
| 303 |
|
| 304 |
+
|
| 305 |
+
@log_page_visit()
|
| 306 |
+
def data_summary_section():
|
| 307 |
# Summary Section
|
| 308 |
st.markdown("## Data Summary")
|
| 309 |
tab1, tab2 = st.tabs(["Overall Summary", "Summary by Station"])
|
|
|
|
| 356 |
"This plot shows the relationship between dissolved oxygen and water temperature for all data."
|
| 357 |
)
|
| 358 |
if st.session_state.ENABLE_ALTAIR:
|
| 359 |
+
fig = plotly_plot_do_temp_relationship(st.session_state.data["raw_df"])
|
| 360 |
+
st.plotly_chart(fig, use_container_width=True) # type: ignore
|
| 361 |
else:
|
| 362 |
fig = plot_do_temp_relationship(st.session_state.data["raw_df"])
|
| 363 |
st.pyplot(fig)
|
utils/data_loading.py
CHANGED
|
@@ -123,7 +123,7 @@ def load_data(
|
|
| 123 |
end_date: Optional end date filter
|
| 124 |
reporting_month: Optional reporting month filter
|
| 125 |
"""
|
| 126 |
-
raw_df = get_raw_data("data/
|
| 127 |
|
| 128 |
# Get full dataset date range for the date input controls
|
| 129 |
full_dataset_metadata = get_dataset_metadata(raw_df)
|
|
|
|
| 123 |
end_date: Optional end date filter
|
| 124 |
reporting_month: Optional reporting month filter
|
| 125 |
"""
|
| 126 |
+
raw_df = get_raw_data("data/master_data_file_2010-01-01_-_2024-10-31.parquet")
|
| 127 |
|
| 128 |
# Get full dataset date range for the date input controls
|
| 129 |
full_dataset_metadata = get_dataset_metadata(raw_df)
|