"""OSC Usage Dashboard — Streamlit app with Plotly charts, organized in tabs.""" from __future__ import annotations from datetime import date, datetime, timedelta import streamlit as st from charts import ( chart_budget_gauge, chart_burn_rate, chart_daily_usage, chart_dollars_by_user, chart_duration_distribution, chart_efficiency_scatter, chart_job_outcomes, chart_launch_method_count, chart_launch_method_dollars, chart_outcome_breakdown, chart_queue_efficiency, chart_resource_sizing, chart_spend_by_outcome, chart_usage_by_system, ) from config import ALLOCATIONS, INTERACTIVE_METHODS, PROJECT_CODES from data_loader import filter_jobs, load_data st.set_page_config(page_title="OSC Usage Dashboard", layout="wide") # --- Load data --- jobs, snapshots, metadata = load_data() # --- Sidebar --- with st.sidebar: st.title("OSC Usage Dashboard") # Freshness badge pushed_at = metadata.get("pushed_at", "") if pushed_at: pushed_dt = datetime.fromisoformat(pushed_at) age = datetime.now() - pushed_dt if age > timedelta(hours=24): st.warning(f"Data is {age.days}d {age.seconds // 3600}h old") else: st.success(f"Updated {pushed_at[:16]}") else: st.info("No timestamp in metadata") if st.button("Reload Data"): st.cache_data.clear() st.rerun() st.divider() # Date range all_dates = jobs["end_date"].dropna() if not all_dates.empty: min_date = all_dates.min() max_date = all_dates.max() else: min_date = date(2024, 7, 1) max_date = date.today() date_range = st.date_input( "Date Range", value=(min_date, max_date), min_value=min_date, max_value=max_date, ) if isinstance(date_range, tuple) and len(date_range) == 2: date_filter = date_range else: date_filter = (min_date, max_date) # Project filter all_projects = sorted(jobs["project_code"].dropna().unique().tolist()) selected_projects = st.multiselect("Projects", all_projects, default=all_projects) # User filter all_users = sorted(jobs["username"].dropna().unique().tolist()) selected_users = st.multiselect("Users", all_users, default=all_users) # System filter all_systems = sorted(jobs["system_code"].dropna().unique().tolist()) selected_systems = st.multiselect("Systems", all_systems, default=all_systems) # --- Apply filters --- filtered = filter_jobs( jobs, date_range=date_filter, projects=selected_projects, users=selected_users, systems=selected_systems, ) # --- Metric cards (always visible above tabs) --- total_spend = filtered["dollars_used"].sum() total_jobs = len(filtered) # Batch completion % is_batch = ~filtered["launch_method"].isin(INTERACTIVE_METHODS) batch_jobs = filtered[is_batch] batch_total = len(batch_jobs) batch_completed = (batch_jobs["last_state"] == "COMPLETED").sum() batch_completion_pct = batch_completed / batch_total * 100 if batch_total > 0 else 0 # Interactive spend % interactive_dollars = filtered[filtered["launch_method"].isin(INTERACTIVE_METHODS)][ "dollars_used" ].sum() interactive_pct = interactive_dollars / total_spend * 100 if total_spend > 0 else 0 m1, m2, m3, m4 = st.columns(4) m1.metric("Total Spend", f"${total_spend:,.2f}", help="Sum of all job costs in the filtered period") m2.metric("Total Jobs", f"{total_jobs:,}", help="Number of jobs in the filtered period") m3.metric( "Batch Completion %", f"{batch_completion_pct:.1f}%", help="% of batch jobs that completed successfully (interactive sessions excluded — they never exit as COMPLETED)", ) m4.metric( "Interactive Spend %", f"{interactive_pct:.1f}%", help="% of total dollars spent on interactive sessions (Jupyter, Desktop, Code Server, etc.)", ) st.divider() # --- 5 tabs --- tab_overview, tab_spend, tab_health, tab_user, tab_data = st.tabs( ["Overview", "Spend Analysis", "Job Health", "User Detail", "Raw Data"] ) # === Overview tab === with tab_overview: # Budget gauges per project gauge_cols = st.columns(len(PROJECT_CODES)) for col, proj in zip(gauge_cols, PROJECT_CODES): with col: proj_spend = filtered.loc[filtered["project_code"] == proj, "dollars_used"].sum() alloc = ALLOCATIONS.get(proj, 0) if alloc > 0: fig = chart_budget_gauge(proj, proj_spend, alloc) st.plotly_chart(fig, use_container_width=True) else: st.metric(proj, f"${proj_spend:,.2f}", help="No allocation data") # Per-project balance from snapshots latest_snapshots = snapshots.sort_values("snapshot_date").groupby("project_code").last() snap_cols = st.columns(len(PROJECT_CODES)) for col, proj in zip(snap_cols, PROJECT_CODES): with col: if proj in latest_snapshots.index: row = latest_snapshots.loc[proj] balance = row.get("current_balance", None) balance_str = f"${balance:,.2f}" if balance is not None else "N/A" proj_spend = filtered.loc[filtered["project_code"] == proj, "dollars_used"].sum() st.metric( f"{proj} Balance", balance_str, delta=f"-${proj_spend:,.2f} spent", delta_color="inverse", ) # Burn rate (full width) fig = chart_burn_rate(filtered) if fig: st.plotly_chart(fig, use_container_width=True) # === Spend Analysis tab === with tab_spend: left, right = st.columns(2) with left: fig = chart_usage_by_system(filtered) if fig: st.plotly_chart(fig, use_container_width=True) with right: fig = chart_dollars_by_user(filtered) if fig: st.plotly_chart(fig, use_container_width=True) left2, right2 = st.columns(2) with left2: fig = chart_launch_method_dollars(filtered) if fig: st.plotly_chart(fig, use_container_width=True) with right2: fig = chart_launch_method_count(filtered) if fig: st.plotly_chart(fig, use_container_width=True) # Daily usage (full width with range slider) fig = chart_daily_usage(filtered) if fig: st.plotly_chart(fig, use_container_width=True) # === Job Health tab === with tab_health: left, right = st.columns(2) with left: fig = chart_job_outcomes(filtered) if fig: st.plotly_chart(fig, use_container_width=True) with right: fig = chart_spend_by_outcome(filtered) if fig: st.plotly_chart(fig, use_container_width=True) left2, right2 = st.columns(2) with left2: fig = chart_outcome_breakdown(filtered) if fig: st.plotly_chart(fig, use_container_width=True) with right2: fig = chart_efficiency_scatter(filtered) if fig: st.plotly_chart(fig, use_container_width=True) left3, right3 = st.columns(2) with left3: fig = chart_queue_efficiency(filtered) if fig: st.plotly_chart(fig, use_container_width=True) with right3: fig = chart_duration_distribution(filtered) if fig: st.plotly_chart(fig, use_container_width=True) # Resource sizing (full width) fig = chart_resource_sizing(filtered) if fig: st.plotly_chart(fig, use_container_width=True) # === User Detail tab === with tab_user: users_in_data = sorted(filtered["username"].dropna().unique().tolist()) if not users_in_data: st.info("No users in filtered data.") else: selected_user = st.selectbox("Select User", users_in_data) user_df = filtered[filtered["username"] == selected_user] # User metric cards u_spend = user_df["dollars_used"].sum() u_jobs = len(user_df) u_batch = user_df[~user_df["launch_method"].isin(INTERACTIVE_METHODS)] u_batch_total = len(u_batch) u_batch_completed = (u_batch["last_state"] == "COMPLETED").sum() u_batch_pct = u_batch_completed / u_batch_total * 100 if u_batch_total > 0 else 0 um1, um2, um3 = st.columns(3) um1.metric("User Spend", f"${u_spend:,.2f}") um2.metric("User Jobs", f"{u_jobs:,}") um3.metric("Batch Completion %", f"{u_batch_pct:.1f}%") # User charts left, right = st.columns(2) with left: fig = chart_outcome_breakdown(user_df) if fig: st.plotly_chart(fig, use_container_width=True) with right: fig = chart_efficiency_scatter(user_df) if fig: st.plotly_chart(fig, use_container_width=True) left2, right2 = st.columns(2) with left2: fig = chart_launch_method_dollars(user_df) if fig: st.plotly_chart(fig, use_container_width=True) with right2: fig = chart_resource_sizing(user_df) if fig: st.plotly_chart(fig, use_container_width=True) # Daily usage (full width) fig = chart_daily_usage(user_df) if fig: st.plotly_chart(fig, use_container_width=True) # Duration distribution (full width) fig = chart_duration_distribution(user_df) if fig: st.plotly_chart(fig, use_container_width=True) # User job table st.subheader(f"{selected_user}'s Jobs ({u_jobs:,} rows)") display_cols = [ c for c in [ "job_id", "project_code", "system_code", "queue_name", "launch_method", "last_state", "outcome_category", "walltime_hours", "dollars_used", "end_time", ] if c in user_df.columns ] user_display = user_df[display_cols].sort_values("end_time", ascending=False) col_config = {} if "dollars_used" in user_display.columns: col_config["dollars_used"] = st.column_config.NumberColumn("Cost ($)", format="$%.2f") if "walltime_hours" in user_display.columns: col_config["walltime_hours"] = st.column_config.NumberColumn( "Walltime (hrs)", format="%.1f" ) st.dataframe( user_display, use_container_width=True, column_config=col_config, hide_index=True ) # === Raw Data tab === with tab_data: st.subheader(f"Filtered Jobs ({len(filtered):,} rows)") # Format for display display_cols = [ c for c in [ "job_id", "username", "project_code", "system_code", "queue_name", "launch_method", "last_state", "outcome_category", "walltime_hours", "dollars_used", "submit_time", "start_time", "end_time", ] if c in filtered.columns ] display_df = filtered[display_cols].copy() # Format columns for readability col_config = {} if "dollars_used" in display_df.columns: col_config["dollars_used"] = st.column_config.NumberColumn("Cost ($)", format="$%.2f") if "walltime_hours" in display_df.columns: col_config["walltime_hours"] = st.column_config.NumberColumn( "Walltime (hrs)", format="%.1f" ) st.dataframe(display_df, use_container_width=True, column_config=col_config, hide_index=True) # Download button csv = filtered[display_cols].to_csv(index=False) st.download_button( label="Download filtered data as CSV", data=csv, file_name="osc_usage_filtered.csv", mime="text/csv", ) st.caption(f"Data from OSCusage CLI | {metadata.get('job_count', '?')} jobs in dataset")