buckeyeguy's picture
Upload app.py with huggingface_hub
fa58da8 verified
"""OSC Usage Dashboard — Streamlit app with Plotly charts, organized in tabs."""
from __future__ import annotations
from datetime import date, datetime, timedelta
import streamlit as st
from charts import (
chart_budget_gauge,
chart_burn_rate,
chart_daily_usage,
chart_dollars_by_user,
chart_duration_distribution,
chart_efficiency_scatter,
chart_job_outcomes,
chart_launch_method_count,
chart_launch_method_dollars,
chart_outcome_breakdown,
chart_queue_efficiency,
chart_resource_sizing,
chart_spend_by_outcome,
chart_usage_by_system,
)
from config import ALLOCATIONS, INTERACTIVE_METHODS, PROJECT_CODES
from data_loader import filter_jobs, load_data
st.set_page_config(page_title="OSC Usage Dashboard", layout="wide")
# --- Load data ---
jobs, snapshots, metadata = load_data()
# --- Sidebar ---
with st.sidebar:
st.title("OSC Usage Dashboard")
# Freshness badge
pushed_at = metadata.get("pushed_at", "")
if pushed_at:
pushed_dt = datetime.fromisoformat(pushed_at)
age = datetime.now() - pushed_dt
if age > timedelta(hours=24):
st.warning(f"Data is {age.days}d {age.seconds // 3600}h old")
else:
st.success(f"Updated {pushed_at[:16]}")
else:
st.info("No timestamp in metadata")
if st.button("Reload Data"):
st.cache_data.clear()
st.rerun()
st.divider()
# Date range
all_dates = jobs["end_date"].dropna()
if not all_dates.empty:
min_date = all_dates.min()
max_date = all_dates.max()
else:
min_date = date(2024, 7, 1)
max_date = date.today()
date_range = st.date_input(
"Date Range",
value=(min_date, max_date),
min_value=min_date,
max_value=max_date,
)
if isinstance(date_range, tuple) and len(date_range) == 2:
date_filter = date_range
else:
date_filter = (min_date, max_date)
# Project filter
all_projects = sorted(jobs["project_code"].dropna().unique().tolist())
selected_projects = st.multiselect("Projects", all_projects, default=all_projects)
# User filter
all_users = sorted(jobs["username"].dropna().unique().tolist())
selected_users = st.multiselect("Users", all_users, default=all_users)
# System filter
all_systems = sorted(jobs["system_code"].dropna().unique().tolist())
selected_systems = st.multiselect("Systems", all_systems, default=all_systems)
# --- Apply filters ---
filtered = filter_jobs(
jobs,
date_range=date_filter,
projects=selected_projects,
users=selected_users,
systems=selected_systems,
)
# --- Metric cards (always visible above tabs) ---
total_spend = filtered["dollars_used"].sum()
total_jobs = len(filtered)
# Batch completion %
is_batch = ~filtered["launch_method"].isin(INTERACTIVE_METHODS)
batch_jobs = filtered[is_batch]
batch_total = len(batch_jobs)
batch_completed = (batch_jobs["last_state"] == "COMPLETED").sum()
batch_completion_pct = batch_completed / batch_total * 100 if batch_total > 0 else 0
# Interactive spend %
interactive_dollars = filtered[filtered["launch_method"].isin(INTERACTIVE_METHODS)][
"dollars_used"
].sum()
interactive_pct = interactive_dollars / total_spend * 100 if total_spend > 0 else 0
m1, m2, m3, m4 = st.columns(4)
m1.metric("Total Spend", f"${total_spend:,.2f}", help="Sum of all job costs in the filtered period")
m2.metric("Total Jobs", f"{total_jobs:,}", help="Number of jobs in the filtered period")
m3.metric(
"Batch Completion %",
f"{batch_completion_pct:.1f}%",
help="% of batch jobs that completed successfully (interactive sessions excluded — they never exit as COMPLETED)",
)
m4.metric(
"Interactive Spend %",
f"{interactive_pct:.1f}%",
help="% of total dollars spent on interactive sessions (Jupyter, Desktop, Code Server, etc.)",
)
st.divider()
# --- 5 tabs ---
tab_overview, tab_spend, tab_health, tab_user, tab_data = st.tabs(
["Overview", "Spend Analysis", "Job Health", "User Detail", "Raw Data"]
)
# === Overview tab ===
with tab_overview:
# Budget gauges per project
gauge_cols = st.columns(len(PROJECT_CODES))
for col, proj in zip(gauge_cols, PROJECT_CODES):
with col:
proj_spend = filtered.loc[filtered["project_code"] == proj, "dollars_used"].sum()
alloc = ALLOCATIONS.get(proj, 0)
if alloc > 0:
fig = chart_budget_gauge(proj, proj_spend, alloc)
st.plotly_chart(fig, use_container_width=True)
else:
st.metric(proj, f"${proj_spend:,.2f}", help="No allocation data")
# Per-project balance from snapshots
latest_snapshots = snapshots.sort_values("snapshot_date").groupby("project_code").last()
snap_cols = st.columns(len(PROJECT_CODES))
for col, proj in zip(snap_cols, PROJECT_CODES):
with col:
if proj in latest_snapshots.index:
row = latest_snapshots.loc[proj]
balance = row.get("current_balance", None)
balance_str = f"${balance:,.2f}" if balance is not None else "N/A"
proj_spend = filtered.loc[filtered["project_code"] == proj, "dollars_used"].sum()
st.metric(
f"{proj} Balance",
balance_str,
delta=f"-${proj_spend:,.2f} spent",
delta_color="inverse",
)
# Burn rate (full width)
fig = chart_burn_rate(filtered)
if fig:
st.plotly_chart(fig, use_container_width=True)
# === Spend Analysis tab ===
with tab_spend:
left, right = st.columns(2)
with left:
fig = chart_usage_by_system(filtered)
if fig:
st.plotly_chart(fig, use_container_width=True)
with right:
fig = chart_dollars_by_user(filtered)
if fig:
st.plotly_chart(fig, use_container_width=True)
left2, right2 = st.columns(2)
with left2:
fig = chart_launch_method_dollars(filtered)
if fig:
st.plotly_chart(fig, use_container_width=True)
with right2:
fig = chart_launch_method_count(filtered)
if fig:
st.plotly_chart(fig, use_container_width=True)
# Daily usage (full width with range slider)
fig = chart_daily_usage(filtered)
if fig:
st.plotly_chart(fig, use_container_width=True)
# === Job Health tab ===
with tab_health:
left, right = st.columns(2)
with left:
fig = chart_job_outcomes(filtered)
if fig:
st.plotly_chart(fig, use_container_width=True)
with right:
fig = chart_spend_by_outcome(filtered)
if fig:
st.plotly_chart(fig, use_container_width=True)
left2, right2 = st.columns(2)
with left2:
fig = chart_outcome_breakdown(filtered)
if fig:
st.plotly_chart(fig, use_container_width=True)
with right2:
fig = chart_efficiency_scatter(filtered)
if fig:
st.plotly_chart(fig, use_container_width=True)
left3, right3 = st.columns(2)
with left3:
fig = chart_queue_efficiency(filtered)
if fig:
st.plotly_chart(fig, use_container_width=True)
with right3:
fig = chart_duration_distribution(filtered)
if fig:
st.plotly_chart(fig, use_container_width=True)
# Resource sizing (full width)
fig = chart_resource_sizing(filtered)
if fig:
st.plotly_chart(fig, use_container_width=True)
# === User Detail tab ===
with tab_user:
users_in_data = sorted(filtered["username"].dropna().unique().tolist())
if not users_in_data:
st.info("No users in filtered data.")
else:
selected_user = st.selectbox("Select User", users_in_data)
user_df = filtered[filtered["username"] == selected_user]
# User metric cards
u_spend = user_df["dollars_used"].sum()
u_jobs = len(user_df)
u_batch = user_df[~user_df["launch_method"].isin(INTERACTIVE_METHODS)]
u_batch_total = len(u_batch)
u_batch_completed = (u_batch["last_state"] == "COMPLETED").sum()
u_batch_pct = u_batch_completed / u_batch_total * 100 if u_batch_total > 0 else 0
um1, um2, um3 = st.columns(3)
um1.metric("User Spend", f"${u_spend:,.2f}")
um2.metric("User Jobs", f"{u_jobs:,}")
um3.metric("Batch Completion %", f"{u_batch_pct:.1f}%")
# User charts
left, right = st.columns(2)
with left:
fig = chart_outcome_breakdown(user_df)
if fig:
st.plotly_chart(fig, use_container_width=True)
with right:
fig = chart_efficiency_scatter(user_df)
if fig:
st.plotly_chart(fig, use_container_width=True)
left2, right2 = st.columns(2)
with left2:
fig = chart_launch_method_dollars(user_df)
if fig:
st.plotly_chart(fig, use_container_width=True)
with right2:
fig = chart_resource_sizing(user_df)
if fig:
st.plotly_chart(fig, use_container_width=True)
# Daily usage (full width)
fig = chart_daily_usage(user_df)
if fig:
st.plotly_chart(fig, use_container_width=True)
# Duration distribution (full width)
fig = chart_duration_distribution(user_df)
if fig:
st.plotly_chart(fig, use_container_width=True)
# User job table
st.subheader(f"{selected_user}'s Jobs ({u_jobs:,} rows)")
display_cols = [
c
for c in [
"job_id",
"project_code",
"system_code",
"queue_name",
"launch_method",
"last_state",
"outcome_category",
"walltime_hours",
"dollars_used",
"end_time",
]
if c in user_df.columns
]
user_display = user_df[display_cols].sort_values("end_time", ascending=False)
col_config = {}
if "dollars_used" in user_display.columns:
col_config["dollars_used"] = st.column_config.NumberColumn("Cost ($)", format="$%.2f")
if "walltime_hours" in user_display.columns:
col_config["walltime_hours"] = st.column_config.NumberColumn(
"Walltime (hrs)", format="%.1f"
)
st.dataframe(
user_display, use_container_width=True, column_config=col_config, hide_index=True
)
# === Raw Data tab ===
with tab_data:
st.subheader(f"Filtered Jobs ({len(filtered):,} rows)")
# Format for display
display_cols = [
c
for c in [
"job_id",
"username",
"project_code",
"system_code",
"queue_name",
"launch_method",
"last_state",
"outcome_category",
"walltime_hours",
"dollars_used",
"submit_time",
"start_time",
"end_time",
]
if c in filtered.columns
]
display_df = filtered[display_cols].copy()
# Format columns for readability
col_config = {}
if "dollars_used" in display_df.columns:
col_config["dollars_used"] = st.column_config.NumberColumn("Cost ($)", format="$%.2f")
if "walltime_hours" in display_df.columns:
col_config["walltime_hours"] = st.column_config.NumberColumn(
"Walltime (hrs)", format="%.1f"
)
st.dataframe(display_df, use_container_width=True, column_config=col_config, hide_index=True)
# Download button
csv = filtered[display_cols].to_csv(index=False)
st.download_button(
label="Download filtered data as CSV",
data=csv,
file_name="osc_usage_filtered.csv",
mime="text/csv",
)
st.caption(f"Data from OSCusage CLI | {metadata.get('job_count', '?')} jobs in dataset")