Spaces:
Running
Running
| """OSC Usage Dashboard — Streamlit app with Plotly charts, organized in tabs.""" | |
| from __future__ import annotations | |
| from datetime import date, datetime, timedelta | |
| import streamlit as st | |
| from charts import ( | |
| chart_budget_gauge, | |
| chart_burn_rate, | |
| chart_daily_usage, | |
| chart_dollars_by_user, | |
| chart_duration_distribution, | |
| chart_efficiency_scatter, | |
| chart_job_outcomes, | |
| chart_launch_method_count, | |
| chart_launch_method_dollars, | |
| chart_outcome_breakdown, | |
| chart_queue_efficiency, | |
| chart_resource_sizing, | |
| chart_spend_by_outcome, | |
| chart_usage_by_system, | |
| ) | |
| from config import ALLOCATIONS, INTERACTIVE_METHODS, PROJECT_CODES | |
| from data_loader import filter_jobs, load_data | |
| st.set_page_config(page_title="OSC Usage Dashboard", layout="wide") | |
| # --- Load data --- | |
| jobs, snapshots, metadata = load_data() | |
| # --- Sidebar --- | |
| with st.sidebar: | |
| st.title("OSC Usage Dashboard") | |
| # Freshness badge | |
| pushed_at = metadata.get("pushed_at", "") | |
| if pushed_at: | |
| pushed_dt = datetime.fromisoformat(pushed_at) | |
| age = datetime.now() - pushed_dt | |
| if age > timedelta(hours=24): | |
| st.warning(f"Data is {age.days}d {age.seconds // 3600}h old") | |
| else: | |
| st.success(f"Updated {pushed_at[:16]}") | |
| else: | |
| st.info("No timestamp in metadata") | |
| if st.button("Reload Data"): | |
| st.cache_data.clear() | |
| st.rerun() | |
| st.divider() | |
| # Date range | |
| all_dates = jobs["end_date"].dropna() | |
| if not all_dates.empty: | |
| min_date = all_dates.min() | |
| max_date = all_dates.max() | |
| else: | |
| min_date = date(2024, 7, 1) | |
| max_date = date.today() | |
| date_range = st.date_input( | |
| "Date Range", | |
| value=(min_date, max_date), | |
| min_value=min_date, | |
| max_value=max_date, | |
| ) | |
| if isinstance(date_range, tuple) and len(date_range) == 2: | |
| date_filter = date_range | |
| else: | |
| date_filter = (min_date, max_date) | |
| # Project filter | |
| all_projects = sorted(jobs["project_code"].dropna().unique().tolist()) | |
| selected_projects = st.multiselect("Projects", all_projects, default=all_projects) | |
| # User filter | |
| all_users = sorted(jobs["username"].dropna().unique().tolist()) | |
| selected_users = st.multiselect("Users", all_users, default=all_users) | |
| # System filter | |
| all_systems = sorted(jobs["system_code"].dropna().unique().tolist()) | |
| selected_systems = st.multiselect("Systems", all_systems, default=all_systems) | |
| # --- Apply filters --- | |
| filtered = filter_jobs( | |
| jobs, | |
| date_range=date_filter, | |
| projects=selected_projects, | |
| users=selected_users, | |
| systems=selected_systems, | |
| ) | |
| # --- Metric cards (always visible above tabs) --- | |
| total_spend = filtered["dollars_used"].sum() | |
| total_jobs = len(filtered) | |
| # Batch completion % | |
| is_batch = ~filtered["launch_method"].isin(INTERACTIVE_METHODS) | |
| batch_jobs = filtered[is_batch] | |
| batch_total = len(batch_jobs) | |
| batch_completed = (batch_jobs["last_state"] == "COMPLETED").sum() | |
| batch_completion_pct = batch_completed / batch_total * 100 if batch_total > 0 else 0 | |
| # Interactive spend % | |
| interactive_dollars = filtered[filtered["launch_method"].isin(INTERACTIVE_METHODS)][ | |
| "dollars_used" | |
| ].sum() | |
| interactive_pct = interactive_dollars / total_spend * 100 if total_spend > 0 else 0 | |
| m1, m2, m3, m4 = st.columns(4) | |
| m1.metric("Total Spend", f"${total_spend:,.2f}", help="Sum of all job costs in the filtered period") | |
| m2.metric("Total Jobs", f"{total_jobs:,}", help="Number of jobs in the filtered period") | |
| m3.metric( | |
| "Batch Completion %", | |
| f"{batch_completion_pct:.1f}%", | |
| help="% of batch jobs that completed successfully (interactive sessions excluded — they never exit as COMPLETED)", | |
| ) | |
| m4.metric( | |
| "Interactive Spend %", | |
| f"{interactive_pct:.1f}%", | |
| help="% of total dollars spent on interactive sessions (Jupyter, Desktop, Code Server, etc.)", | |
| ) | |
| st.divider() | |
| # --- 5 tabs --- | |
| tab_overview, tab_spend, tab_health, tab_user, tab_data = st.tabs( | |
| ["Overview", "Spend Analysis", "Job Health", "User Detail", "Raw Data"] | |
| ) | |
| # === Overview tab === | |
| with tab_overview: | |
| # Budget gauges per project | |
| gauge_cols = st.columns(len(PROJECT_CODES)) | |
| for col, proj in zip(gauge_cols, PROJECT_CODES): | |
| with col: | |
| proj_spend = filtered.loc[filtered["project_code"] == proj, "dollars_used"].sum() | |
| alloc = ALLOCATIONS.get(proj, 0) | |
| if alloc > 0: | |
| fig = chart_budget_gauge(proj, proj_spend, alloc) | |
| st.plotly_chart(fig, use_container_width=True) | |
| else: | |
| st.metric(proj, f"${proj_spend:,.2f}", help="No allocation data") | |
| # Per-project balance from snapshots | |
| latest_snapshots = snapshots.sort_values("snapshot_date").groupby("project_code").last() | |
| snap_cols = st.columns(len(PROJECT_CODES)) | |
| for col, proj in zip(snap_cols, PROJECT_CODES): | |
| with col: | |
| if proj in latest_snapshots.index: | |
| row = latest_snapshots.loc[proj] | |
| balance = row.get("current_balance", None) | |
| balance_str = f"${balance:,.2f}" if balance is not None else "N/A" | |
| proj_spend = filtered.loc[filtered["project_code"] == proj, "dollars_used"].sum() | |
| st.metric( | |
| f"{proj} Balance", | |
| balance_str, | |
| delta=f"-${proj_spend:,.2f} spent", | |
| delta_color="inverse", | |
| ) | |
| # Burn rate (full width) | |
| fig = chart_burn_rate(filtered) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| # === Spend Analysis tab === | |
| with tab_spend: | |
| left, right = st.columns(2) | |
| with left: | |
| fig = chart_usage_by_system(filtered) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| with right: | |
| fig = chart_dollars_by_user(filtered) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| left2, right2 = st.columns(2) | |
| with left2: | |
| fig = chart_launch_method_dollars(filtered) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| with right2: | |
| fig = chart_launch_method_count(filtered) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Daily usage (full width with range slider) | |
| fig = chart_daily_usage(filtered) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| # === Job Health tab === | |
| with tab_health: | |
| left, right = st.columns(2) | |
| with left: | |
| fig = chart_job_outcomes(filtered) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| with right: | |
| fig = chart_spend_by_outcome(filtered) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| left2, right2 = st.columns(2) | |
| with left2: | |
| fig = chart_outcome_breakdown(filtered) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| with right2: | |
| fig = chart_efficiency_scatter(filtered) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| left3, right3 = st.columns(2) | |
| with left3: | |
| fig = chart_queue_efficiency(filtered) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| with right3: | |
| fig = chart_duration_distribution(filtered) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Resource sizing (full width) | |
| fig = chart_resource_sizing(filtered) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| # === User Detail tab === | |
| with tab_user: | |
| users_in_data = sorted(filtered["username"].dropna().unique().tolist()) | |
| if not users_in_data: | |
| st.info("No users in filtered data.") | |
| else: | |
| selected_user = st.selectbox("Select User", users_in_data) | |
| user_df = filtered[filtered["username"] == selected_user] | |
| # User metric cards | |
| u_spend = user_df["dollars_used"].sum() | |
| u_jobs = len(user_df) | |
| u_batch = user_df[~user_df["launch_method"].isin(INTERACTIVE_METHODS)] | |
| u_batch_total = len(u_batch) | |
| u_batch_completed = (u_batch["last_state"] == "COMPLETED").sum() | |
| u_batch_pct = u_batch_completed / u_batch_total * 100 if u_batch_total > 0 else 0 | |
| um1, um2, um3 = st.columns(3) | |
| um1.metric("User Spend", f"${u_spend:,.2f}") | |
| um2.metric("User Jobs", f"{u_jobs:,}") | |
| um3.metric("Batch Completion %", f"{u_batch_pct:.1f}%") | |
| # User charts | |
| left, right = st.columns(2) | |
| with left: | |
| fig = chart_outcome_breakdown(user_df) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| with right: | |
| fig = chart_efficiency_scatter(user_df) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| left2, right2 = st.columns(2) | |
| with left2: | |
| fig = chart_launch_method_dollars(user_df) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| with right2: | |
| fig = chart_resource_sizing(user_df) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Daily usage (full width) | |
| fig = chart_daily_usage(user_df) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Duration distribution (full width) | |
| fig = chart_duration_distribution(user_df) | |
| if fig: | |
| st.plotly_chart(fig, use_container_width=True) | |
| # User job table | |
| st.subheader(f"{selected_user}'s Jobs ({u_jobs:,} rows)") | |
| display_cols = [ | |
| c | |
| for c in [ | |
| "job_id", | |
| "project_code", | |
| "system_code", | |
| "queue_name", | |
| "launch_method", | |
| "last_state", | |
| "outcome_category", | |
| "walltime_hours", | |
| "dollars_used", | |
| "end_time", | |
| ] | |
| if c in user_df.columns | |
| ] | |
| user_display = user_df[display_cols].sort_values("end_time", ascending=False) | |
| col_config = {} | |
| if "dollars_used" in user_display.columns: | |
| col_config["dollars_used"] = st.column_config.NumberColumn("Cost ($)", format="$%.2f") | |
| if "walltime_hours" in user_display.columns: | |
| col_config["walltime_hours"] = st.column_config.NumberColumn( | |
| "Walltime (hrs)", format="%.1f" | |
| ) | |
| st.dataframe( | |
| user_display, use_container_width=True, column_config=col_config, hide_index=True | |
| ) | |
| # === Raw Data tab === | |
| with tab_data: | |
| st.subheader(f"Filtered Jobs ({len(filtered):,} rows)") | |
| # Format for display | |
| display_cols = [ | |
| c | |
| for c in [ | |
| "job_id", | |
| "username", | |
| "project_code", | |
| "system_code", | |
| "queue_name", | |
| "launch_method", | |
| "last_state", | |
| "outcome_category", | |
| "walltime_hours", | |
| "dollars_used", | |
| "submit_time", | |
| "start_time", | |
| "end_time", | |
| ] | |
| if c in filtered.columns | |
| ] | |
| display_df = filtered[display_cols].copy() | |
| # Format columns for readability | |
| col_config = {} | |
| if "dollars_used" in display_df.columns: | |
| col_config["dollars_used"] = st.column_config.NumberColumn("Cost ($)", format="$%.2f") | |
| if "walltime_hours" in display_df.columns: | |
| col_config["walltime_hours"] = st.column_config.NumberColumn( | |
| "Walltime (hrs)", format="%.1f" | |
| ) | |
| st.dataframe(display_df, use_container_width=True, column_config=col_config, hide_index=True) | |
| # Download button | |
| csv = filtered[display_cols].to_csv(index=False) | |
| st.download_button( | |
| label="Download filtered data as CSV", | |
| data=csv, | |
| file_name="osc_usage_filtered.csv", | |
| mime="text/csv", | |
| ) | |
| st.caption(f"Data from OSCusage CLI | {metadata.get('job_count', '?')} jobs in dataset") | |