github-actions[bot] commited on
Commit
39e6d5e
·
1 Parent(s): bb82b34

Add all files with LFS support

Browse files
analysis.py CHANGED
@@ -1254,6 +1254,138 @@ def plot_do_temp_relationship(df: pd.DataFrame) -> Figure:
1254
  return g.figure
1255
 
1256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1257
  def altair_plot_do_temp_relationship(df: pd.DataFrame) -> alt.LayerChart:
1258
  """
1259
  Create an interactive scatter plot of DO vs temperature with regression lines using Altair.
 
1254
  return g.figure
1255
 
1256
 
1257
+ def plotly_plot_do_temp_relationship(df: pd.DataFrame) -> go.Figure:
1258
+ """
1259
+ Create an interactive scatter plot of DO vs temperature with regression lines using Plotly.
1260
+ Matches the style and features of the original matplotlib/seaborn plot.
1261
+
1262
+ Parameters:
1263
+ -----------
1264
+ df : pd.DataFrame
1265
+ Input dataframe containing DO and temperature measurements
1266
+
1267
+ Returns:
1268
+ --------
1269
+ go.Figure
1270
+ Plotly figure object
1271
+ """
1272
+ # Prepare the data similarly to the original function
1273
+ do_temp_data = (
1274
+ df[df["Org_Analyte_Name"].isin(["Dissolved Oxygen", "Temperature, Water"])]
1275
+ .pivot_table(
1276
+ index=[
1277
+ "Activity_Start_Date_Time",
1278
+ "Station_Number",
1279
+ "Sample_Position",
1280
+ "Sector", # Added for tooltip
1281
+ ],
1282
+ columns="Org_Analyte_Name",
1283
+ values="Org_Result_Value",
1284
+ observed=True,
1285
+ )
1286
+ .reset_index()
1287
+ .dropna(subset=["Dissolved Oxygen", "Temperature, Water"])
1288
+ )
1289
+
1290
+ # Create figure
1291
+ fig = go.Figure()
1292
+
1293
+ # Colors matching seaborn's muted palette
1294
+ colors = {"Surface": "#8da0cb", "Bottom": "#fc8d62"}
1295
+
1296
+ # Add scatter plots and regression lines for each position
1297
+ for position in ["Surface", "Bottom"]:
1298
+ pos_data = do_temp_data[do_temp_data["Sample_Position"] == position]
1299
+
1300
+ # Add scatter plot
1301
+ fig.add_trace(
1302
+ go.Scatter(
1303
+ x=pos_data["Temperature, Water"],
1304
+ y=pos_data["Dissolved Oxygen"],
1305
+ mode="markers",
1306
+ name=position,
1307
+ marker=dict(color=colors[position], size=8, opacity=0.6),
1308
+ hovertemplate=(
1309
+ "Temperature: %{x:.1f}°C<br>"
1310
+ "DO: %{y:.1f} mg/L<br>"
1311
+ "Position: " + position + "<br>"
1312
+ "Station: %{customdata[0]}<br>"
1313
+ "Sector: %{customdata[1]}<br>"
1314
+ "<extra></extra>"
1315
+ ),
1316
+ customdata=pos_data[["Station_Number", "Sector"]],
1317
+ )
1318
+ )
1319
+
1320
+ # Calculate and add regression line
1321
+ z = np.polyfit(pos_data["Temperature, Water"], pos_data["Dissolved Oxygen"], 1)
1322
+ p = np.poly1d(z)
1323
+ x_range = np.linspace(
1324
+ pos_data["Temperature, Water"].min(),
1325
+ pos_data["Temperature, Water"].max(),
1326
+ 100,
1327
+ )
1328
+
1329
+ fig.add_trace(
1330
+ go.Scatter(
1331
+ x=x_range,
1332
+ y=p(x_range),
1333
+ mode="lines",
1334
+ line=dict(color=colors[position], dash="dash"),
1335
+ name=f"{position} Trend",
1336
+ hovertemplate=None,
1337
+ hoverinfo="skip",
1338
+ showlegend=False,
1339
+ )
1340
+ )
1341
+
1342
+ # Add DO threshold line
1343
+ fig.add_hline(
1344
+ y=5,
1345
+ line=dict(color="red", width=1, dash="dot"),
1346
+ opacity=0.5,
1347
+ annotation_text="5 mg/L DO threshold",
1348
+ annotation_position="left",
1349
+ annotation=dict(
1350
+ font=dict(color="red", size=12),
1351
+ xanchor="left",
1352
+ yanchor="bottom",
1353
+ opacity=0.5,
1354
+ ),
1355
+ )
1356
+
1357
+ # Update layout
1358
+ fig.update_layout(
1359
+ title=dict(
1360
+ text="Dissolved Oxygen vs Water Temperature",
1361
+ x=0.5,
1362
+ y=0.95,
1363
+ xanchor="center",
1364
+ yanchor="top",
1365
+ font=dict(size=16),
1366
+ ),
1367
+ xaxis_title="Water Temperature (°C)",
1368
+ yaxis_title="Dissolved Oxygen (mg/L)",
1369
+ legend_title="Sample Position",
1370
+ legend=dict(
1371
+ yanchor="top",
1372
+ y=1,
1373
+ xanchor="left",
1374
+ x=1.05,
1375
+ ),
1376
+ template="plotly_white",
1377
+ width=800,
1378
+ height=600,
1379
+ showlegend=True,
1380
+ )
1381
+
1382
+ # Update axes
1383
+ fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="rgba(128, 128, 128, 0.2)")
1384
+ fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="rgba(128, 128, 128, 0.2)")
1385
+
1386
+ return fig
1387
+
1388
+
1389
  def altair_plot_do_temp_relationship(df: pd.DataFrame) -> alt.LayerChart:
1390
  """
1391
  Create an interactive scatter plot of DO vs temperature with regression lines using Altair.
data/master_data_file_2010-01-01_-_2024-10-31.parquet ADDED
Binary file (493 kB). View file
 
main.py CHANGED
@@ -1,13 +1,11 @@
1
  import pandas as pd
2
 
3
 
4
- def get_raw_data(file_path: str):
5
  """
6
- Read raw data from a CSV or Parquet file.
7
  """
8
- if file_path.endswith(".parquet"):
9
- return pd.read_parquet(file_path)
10
-
11
  categorical_columns = [
12
  "Monitoring_Location_ID",
13
  "Activity_Depth_Unit",
@@ -27,11 +25,15 @@ def get_raw_data(file_path: str):
27
  **{col: "category" for col in categorical_columns},
28
  }
29
 
30
- return pd.read_csv(file_path, dtype=dtype_dict).assign(
31
- Org_Result_Value=lambda df: pd.to_numeric(
32
- df["Org_Result_Value"].replace("Not Reported", pd.NA), errors="coerce"
33
- ),
34
- Activity_Start_Date_Time=lambda df: pd.to_datetime(
35
- df["Activity_Start_Date_Time"]
36
- ),
 
 
 
 
37
  )
 
1
  import pandas as pd
2
 
3
 
4
+ def master_data_csv_to_parquet(file_path: str):
5
  """
6
+ Convert master data export from a CSV to a Parquet file.
7
  """
8
+ save_path = file_path.replace(".csv", ".parquet")
 
 
9
  categorical_columns = [
10
  "Monitoring_Location_ID",
11
  "Activity_Depth_Unit",
 
25
  **{col: "category" for col in categorical_columns},
26
  }
27
 
28
+ return (
29
+ pd.read_csv(file_path, dtype=dtype_dict)
30
+ .assign(
31
+ Org_Result_Value=lambda df: pd.to_numeric(
32
+ df["Org_Result_Value"].replace("Not Reported", pd.NA), errors="coerce"
33
+ ),
34
+ Activity_Start_Date_Time=lambda df: pd.to_datetime(
35
+ df["Activity_Start_Date_Time"]
36
+ ),
37
+ )
38
+ .to_parquet(save_path)
39
  )
pages.py CHANGED
@@ -7,7 +7,6 @@ import pandas as pd
7
  import streamlit as st
8
 
9
  from analysis import (
10
- altair_plot_do_temp_relationship,
11
  altair_plot_np_ratios,
12
  altair_plot_sector_trends,
13
  generate_seasonal_plot,
@@ -16,6 +15,7 @@ from analysis import (
16
  plot_np_ratios,
17
  plot_sector_trends,
18
  plot_trends_by_station,
 
19
  )
20
  from components import (
21
  get_reporting_year_info_message,
@@ -187,18 +187,24 @@ def home_section():
187
  stat_col1, stat_col2 = st.columns(2)
188
 
189
  with stat_col1:
190
- st.metric("Active Stations", len(stations_df))
 
 
 
 
 
191
  st.metric("Sectors", len(stations_df["Sector"].unique()))
192
- st.metric("Waterbody IDs", len(stations_df["WBID"].unique()))
193
 
194
  with stat_col2:
195
- st.metric("Total Samples", f"{int(stations_df['Total_Samples'].sum()):,}")
 
196
 
197
  with map_col1:
198
  render_stations_map(stations_df)
199
 
200
  # Add stations table
201
- st.markdown("### Stations Details")
202
 
203
  # Create a simplified view of the stations data
204
  display_columns = [
@@ -289,12 +295,15 @@ def home_section():
289
  csv_buffer = io.StringIO()
290
  stations_table.to_csv(csv_buffer, index=False)
291
  st.download_button(
292
- label="Download Stations Data (CSV)",
293
  data=csv_buffer.getvalue(),
294
  file_name="monitoring_stations.csv",
295
  mime="text/csv",
296
  )
297
 
 
 
 
298
  # Summary Section
299
  st.markdown("## Data Summary")
300
  tab1, tab2 = st.tabs(["Overall Summary", "Summary by Station"])
@@ -347,8 +356,8 @@ def do_temp_relationship_section():
347
  "This plot shows the relationship between dissolved oxygen and water temperature for all data."
348
  )
349
  if st.session_state.ENABLE_ALTAIR:
350
- fig = altair_plot_do_temp_relationship(st.session_state.data["raw_df"])
351
- st.altair_chart(fig, use_container_width=True) # type: ignore
352
  else:
353
  fig = plot_do_temp_relationship(st.session_state.data["raw_df"])
354
  st.pyplot(fig)
 
7
  import streamlit as st
8
 
9
  from analysis import (
 
10
  altair_plot_np_ratios,
11
  altair_plot_sector_trends,
12
  generate_seasonal_plot,
 
15
  plot_np_ratios,
16
  plot_sector_trends,
17
  plot_trends_by_station,
18
+ plotly_plot_do_temp_relationship,
19
  )
20
  from components import (
21
  get_reporting_year_info_message,
 
187
  stat_col1, stat_col2 = st.columns(2)
188
 
189
  with stat_col1:
190
+ # Calculate active stations (sampled within last 12 months)
191
+ today = pd.Timestamp.today()
192
+ active_mask = pd.to_datetime(stations_df["Most_Recent_Sample"]) > (
193
+ today - pd.DateOffset(months=12)
194
+ )
195
+ st.metric("Active Stations", active_mask.sum())
196
  st.metric("Sectors", len(stations_df["Sector"].unique()))
197
+ st.metric("Total Samples", f"{int(stations_df['Total_Samples'].sum()):,}")
198
 
199
  with stat_col2:
200
+ st.metric("Total Stations", len(stations_df))
201
+ st.metric("Waterbody IDs", len(stations_df["WBID"].unique()))
202
 
203
  with map_col1:
204
  render_stations_map(stations_df)
205
 
206
  # Add stations table
207
+ st.markdown("### Station Details")
208
 
209
  # Create a simplified view of the stations data
210
  display_columns = [
 
295
  csv_buffer = io.StringIO()
296
  stations_table.to_csv(csv_buffer, index=False)
297
  st.download_button(
298
+ label="Download Station Details",
299
  data=csv_buffer.getvalue(),
300
  file_name="monitoring_stations.csv",
301
  mime="text/csv",
302
  )
303
 
304
+
305
+ @log_page_visit()
306
+ def data_summary_section():
307
  # Summary Section
308
  st.markdown("## Data Summary")
309
  tab1, tab2 = st.tabs(["Overall Summary", "Summary by Station"])
 
356
  "This plot shows the relationship between dissolved oxygen and water temperature for all data."
357
  )
358
  if st.session_state.ENABLE_ALTAIR:
359
+ fig = plotly_plot_do_temp_relationship(st.session_state.data["raw_df"])
360
+ st.plotly_chart(fig, use_container_width=True) # type: ignore
361
  else:
362
  fig = plot_do_temp_relationship(st.session_state.data["raw_df"])
363
  st.pyplot(fig)
utils/data_loading.py CHANGED
@@ -123,7 +123,7 @@ def load_data(
123
  end_date: Optional end date filter
124
  reporting_month: Optional reporting month filter
125
  """
126
- raw_df = get_raw_data("data/master_data_file_2019-01-01_-_2024-10-31.parquet")
127
 
128
  # Get full dataset date range for the date input controls
129
  full_dataset_metadata = get_dataset_metadata(raw_df)
 
123
  end_date: Optional end date filter
124
  reporting_month: Optional reporting month filter
125
  """
126
+ raw_df = get_raw_data("data/master_data_file_2010-01-01_-_2024-10-31.parquet")
127
 
128
  # Get full dataset date range for the date input controls
129
  full_dataset_metadata = get_dataset_metadata(raw_df)