waterdb / dashboard_analytics.py
github-actions[bot]
Add all files with LFS support
d930228
import json
import textwrap
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional
import pandas as pd
import streamlit as st
def log_visit(current_section: Optional[str] = None) -> None:
"""Log visitor analytics including timestamp, user agent, and page info"""
if st.session_state.get("admin_authenticated", False):
return
log_file = Path("analytics.json")
now = datetime.now()
today = now.strftime("%Y-%m-%d")
try:
user_agent = st.context.headers.get("User-Agent", "Unknown")
except Exception:
user_agent = "Unknown"
# Get the actual current section
actual_section = current_section or st.session_state.get(
"current_section", "Overall Summary"
)
# Determine visit type
visit_type = "initial"
if st.session_state.get("logged_visit"):
if actual_section == st.session_state.get("last_logged_section"):
visit_type = "rerun"
else:
visit_type = "section_change"
# Store the current section for future comparison
st.session_state["last_logged_section"] = actual_section
visit_data = {
"timestamp": now.isoformat(),
"date": today,
"user_agent": user_agent,
"visitor_id": st.session_state.visitor_id,
"page_section": actual_section,
"visit_type": visit_type,
"query_params": dict(st.query_params),
}
data = {
"visits": [],
"daily_counts": {},
"section_counts": {},
"daily_visitors": {},
}
# Try to load existing data, fallback to default if corrupted
if log_file.exists():
try:
with open(log_file, "r") as f:
data = json.load(f)
if "visits" not in data:
data["visits"] = []
if "daily_counts" not in data:
data["daily_counts"] = {}
if "section_counts" not in data:
data["section_counts"] = {}
if "daily_visitors" not in data:
data["daily_visitors"] = {}
except json.JSONDecodeError:
# If file is corrupted, backup the old file and start fresh
if log_file.exists():
backup_file = log_file.with_suffix(".json.bak")
log_file.rename(backup_file)
if today not in data["daily_visitors"]:
data["daily_visitors"][today] = []
if st.session_state.visitor_id not in data["daily_visitors"][today]:
data["daily_visitors"][today].append(st.session_state.visitor_id)
data["daily_counts"][today] = len(data["daily_visitors"][today])
data["visits"].append(visit_data)
current_section = visit_data["page_section"]
data["section_counts"][current_section] = (
data["section_counts"].get(current_section, 0) + 1
)
with open(log_file, "w") as f:
json.dump(data, f, indent=2)
st.session_state["logged_visit"] = True
def get_analytics_data() -> Dict[str, Any]:
"""Load and return analytics data from file"""
log_file = Path("analytics.json")
if not log_file.exists():
return {}
with open(log_file, "r") as f:
return json.load(f)
def render_timing_stats() -> None:
"""Render performance timing statistics"""
if not st.session_state.ENABLE_TIMING or not st.session_state.admin_authenticated:
return
st.markdown("---")
st.subheader("⚡ Performance Metrics")
if not hasattr(st.session_state, "timing_stats"):
st.info("No timing statistics available yet. Try refreshing the page.")
return
st.markdown("#### Summary Statistics")
timing_summary = []
for func_name, durations in st.session_state.timing_stats.items():
timing_summary.append(
{
"Function": func_name,
"Min (seconds)": min(durations),
"Max (seconds)": max(durations),
"Mean (seconds)": sum(durations) / len(durations),
"Calls": len(durations),
}
)
timing_df = pd.DataFrame(timing_summary).sort_values(
"Mean (seconds)", ascending=False
)
st.dataframe(
timing_df.style.format(
{
"Min (seconds)": "{:.2f}",
"Max (seconds)": "{:.2f}",
"Mean (seconds)": "{:.2f}",
"Calls": "{:,.0f}",
}
),
use_container_width=True,
hide_index=True,
)
if st.session_state.timing_logs:
st.markdown("#### Detailed Function Calls")
logs_df = pd.DataFrame(st.session_state.timing_logs)
logs_df["timestamp"] = pd.to_datetime(logs_df["timestamp"])
if "parameters" in logs_df.columns:
logs_df["parameters"] = logs_df["parameters"].apply(
lambda x: (
"\n".join(
textwrap.wrap(
"\n".join(f"{k}: {v}" for k, v in x.items()),
width=50,
break_long_words=False,
replace_whitespace=False,
)
)
if isinstance(x, dict)
else str(x)
)
)
logs_df = logs_df.sort_values("timestamp", ascending=False)
st.dataframe(
logs_df.style.format(
{
"duration": "{:.2f} seconds",
"timestamp": lambda x: x.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3], # type: ignore
}
),
use_container_width=True,
height=400,
column_config={
"parameters": st.column_config.TextColumn(
"parameters",
width="large",
help="Function parameters and their values",
)
},
hide_index=True,
)
def render_analytics_page() -> None:
st.title("Analytics")
analytics_data = get_analytics_data()
col1, col2 = st.columns(2)
with col1:
visits_df = pd.DataFrame(analytics_data["visits"])
visits_df["timestamp"] = pd.to_datetime(visits_df["timestamp"])
daily_visits_df = (
visits_df.groupby("date")["visitor_id"]
.agg(["nunique", "count"])
.reset_index()
.rename(columns={"nunique": "Unique Visitors", "count": "Total Views"})
)
daily_visits_df["date"] = pd.to_datetime(daily_visits_df["date"])
daily_visits_df = daily_visits_df.sort_values("date")
total_unique_visitors = visits_df["visitor_id"].nunique()
total_views = len(visits_df)
avg_views_per_visitor = total_views / total_unique_visitors
st.subheader("Visitor Metrics")
metrics_col1, metrics_col2, metrics_col3 = st.columns(3)
metrics_col1.metric("Total Unique Visitors", total_unique_visitors)
metrics_col2.metric("Total Page Views", total_views)
metrics_col3.metric("Avg Views per Visitor", f"{avg_views_per_visitor:.1f}")
st.subheader("Daily Statistics")
st.dataframe(
daily_visits_df.style.format(
{
"date": "{:%Y-%m-%d}",
"Unique Visitors": "{:,.0f}",
"Total Views": "{:,.0f}",
}
),
hide_index=True,
)
with col2:
section_visits_df = pd.DataFrame(
{
"Section": analytics_data["section_counts"].keys(),
"Views": analytics_data["section_counts"].values(),
}
)
section_visits_df = section_visits_df.sort_values("Views", ascending=True)
st.subheader("Total Section Views")
st.bar_chart(section_visits_df.set_index("Section"))
st.markdown("#### Raw Visit Data")
cols = [
"timestamp",
"page_section",
"visit_type",
"query_params",
"user_agent",
"visitor_id",
]
visits_df = pd.DataFrame(analytics_data["visits"])
visits_df["timestamp"] = pd.to_datetime(visits_df["timestamp"])
st.dataframe(
visits_df[cols].sort_values("timestamp", ascending=False),
use_container_width=True,
height=600,
hide_index=True,
)
render_timing_stats()