osc-usage-dashboard / charts.py
buckeyeguy's picture
Upload charts.py with huggingface_hub
53fe9eb verified
"""Plotly chart functions for the OSC Usage Dashboard."""
from __future__ import annotations
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from config import ALLOCATIONS, COLORS, FISCAL_YEAR_END
def _hex_to_rgba(hex_color: str, alpha: float) -> str:
"""Convert '#RRGGBB' to 'rgba(R,G,B,alpha)'."""
h = hex_color.lstrip("#")
r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
return f"rgba({r},{g},{b},{alpha})"
def chart_usage_by_system(df: pd.DataFrame) -> go.Figure | None:
"""Grouped bar: CPU vs GPU dollars per cluster."""
if df.empty:
return None
agg = df.groupby(["system_code", "subtype_code"])["dollars_used"].sum().reset_index()
fig = px.bar(
agg,
x="system_code",
y="dollars_used",
color="subtype_code",
barmode="group",
color_discrete_map=COLORS["cpu_gpu"],
labels={"system_code": "Cluster", "dollars_used": "Dollars", "subtype_code": "Type"},
title="Usage by System (CPU vs GPU)",
)
fig.update_yaxes(tickprefix="$")
fig.update_traces(hovertemplate="Cluster: %{x}<br>%{data.name}: $%{y:,.2f}<extra></extra>")
return fig
def chart_daily_usage(df: pd.DataFrame) -> go.Figure | None:
"""Line chart: dollars per day per system, with range slider."""
valid = df.dropna(subset=["end_date"])
if valid.empty:
return None
agg = valid.groupby(["end_date", "system_code"])["dollars_used"].sum().reset_index()
fig = px.line(
agg,
x="end_date",
y="dollars_used",
color="system_code",
markers=True,
color_discrete_map=COLORS["systems"],
labels={"end_date": "Date", "dollars_used": "Dollars", "system_code": "System"},
title="Daily Usage by System",
)
fig.update_yaxes(tickprefix="$")
fig.update_xaxes(
rangeslider_visible=True,
rangeselector=dict(
buttons=[
dict(count=1, label="1m", step="month", stepmode="backward"),
dict(count=3, label="3m", step="month", stepmode="backward"),
dict(label="YTD", step="year", stepmode="todate"),
dict(label="All", step="all"),
]
),
)
fig.update_layout(height=500)
return fig
def chart_dollars_by_user(df: pd.DataFrame) -> go.Figure | None:
"""Pie chart: per-user dollar breakdown."""
if df.empty:
return None
agg = (
df.groupby("username")["dollars_used"]
.sum()
.reset_index()
.sort_values("dollars_used", ascending=False)
)
fig = px.pie(
agg,
names="username",
values="dollars_used",
color_discrete_sequence=COLORS["pie"],
title="Dollars by User",
)
fig.update_traces(textinfo="label+value", texttemplate="%{label}: $%{value:,.2f}")
return fig
def chart_job_outcomes(df: pd.DataFrame) -> go.Figure | None:
"""Stacked bar: job outcomes by month."""
valid = df.dropna(subset=["end_month"])
if valid.empty:
return None
agg = valid.groupby(["end_month", "last_state"]).size().reset_index(name="count")
fig = px.bar(
agg,
x="end_month",
y="count",
color="last_state",
barmode="stack",
color_discrete_map=COLORS["states"],
labels={"end_month": "Month", "count": "Job Count", "last_state": "State"},
title="Job Outcomes by Month",
)
fig.update_traces(hovertemplate="%{x}<br>%{data.name}: %{y:,} jobs<extra></extra>")
return fig
def chart_burn_rate(df: pd.DataFrame) -> go.Figure | None:
"""Cumulative area chart with projection and allocation ceiling per project."""
valid = df.dropna(subset=["end_date"])
if valid.empty:
return None
fig = go.Figure()
projects = sorted(valid["project_code"].unique())
for proj in projects:
proj_df = valid[valid["project_code"] == proj]
daily = proj_df.groupby("end_date")["dollars_used"].sum().sort_index()
cumulative = daily.cumsum()
color = COLORS["projects"].get(proj, "#999999")
dates = cumulative.index.tolist()
vals = cumulative.values.tolist()
fig.add_trace(
go.Scatter(
x=dates,
y=vals,
mode="lines",
name=proj,
line=dict(color=color, width=2),
fill="tozeroy",
fillcolor=_hex_to_rgba(color, 0.15),
hovertemplate="%{x|%b %d, %Y}<br>Cumulative: $%{y:,.2f}<extra>%{fullData.name}</extra>",
)
)
# Projection line
if len(dates) >= 2:
last_date = dates[-1]
last_cum = vals[-1]
days_elapsed = (last_date - dates[0]).days
if days_elapsed > 0:
daily_rate = last_cum / days_elapsed
days_remaining = (FISCAL_YEAR_END - last_date).days
if days_remaining > 0:
projected_total = last_cum + daily_rate * days_remaining
fig.add_trace(
go.Scatter(
x=[last_date, FISCAL_YEAR_END],
y=[last_cum, projected_total],
mode="lines",
line=dict(color=color, dash="dash", width=1.5),
showlegend=False,
opacity=0.6,
)
)
# Allocation ceiling
alloc = ALLOCATIONS.get(proj)
if alloc:
fig.add_hline(
y=alloc,
line_dash="dot",
line_color=color,
opacity=0.5,
annotation_text=f"{proj}: ${alloc:,.0f}",
annotation_position="top left",
)
fig.update_layout(
title="Budget Burn Rate (with projection to June 30)",
xaxis_title="Date",
yaxis_title="Cumulative Dollars",
yaxis_tickprefix="$",
annotations=[
dict(
text="Dashed line = linear projection (total spend / days elapsed × days remaining)",
xref="paper",
yref="paper",
x=0,
y=-0.12,
showarrow=False,
font=dict(size=11, color="#666666"),
)
],
margin=dict(b=80),
)
return fig
def chart_spend_by_outcome(df: pd.DataFrame) -> go.Figure | None:
"""Horizontal bar: dollars by outcome category, faceted by project."""
if df.empty:
return None
state_col = "outcome_category" if "outcome_category" in df.columns else "last_state"
color_map = {**COLORS["states"], **COLORS["outcome_categories"]}
agg = df.groupby(["project_code", state_col])["dollars_used"].sum().reset_index()
agg.columns = ["project_code", "state", "dollars_used"]
agg = agg.sort_values("dollars_used", ascending=True)
fig = px.bar(
agg,
y="state",
x="dollars_used",
color="state",
facet_col="project_code",
orientation="h",
color_discrete_map=color_map,
labels={"state": "Outcome", "dollars_used": "Dollars", "project_code": "Project"},
title="Spend by Outcome",
)
fig.update_xaxes(tickprefix="$")
fig.update_layout(showlegend=False)
return fig
def chart_queue_efficiency(df: pd.DataFrame) -> go.Figure | None:
"""Dual-axis: avg $/job (bars) + completion rate % (line) per queue."""
if df.empty:
return None
agg = (
df.groupby("queue_name")
.agg(
job_count=("job_id", "count"),
avg_dollars=("dollars_used", "mean"),
completed=("last_state", lambda x: (x == "COMPLETED").sum()),
)
.reset_index()
)
agg = agg[agg["job_count"] >= 10].copy()
if agg.empty:
return None
agg["completion_pct"] = agg["completed"] / agg["job_count"] * 100
agg = agg.sort_values("avg_dollars", ascending=False)
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Bar(
x=agg["queue_name"],
y=agg["avg_dollars"],
name="Avg $/job",
marker_color="#2176AE",
opacity=0.7,
text=[f"n={c}" for c in agg["job_count"]],
textposition="outside",
),
secondary_y=False,
)
fig.add_trace(
go.Scatter(
x=agg["queue_name"],
y=agg["completion_pct"],
name="Completion %",
mode="lines+markers",
line=dict(color="#E63946", width=2),
marker=dict(size=8),
),
secondary_y=True,
)
fig.update_yaxes(title_text="Avg Dollars per Job", tickprefix="$", secondary_y=False)
fig.update_yaxes(title_text="Completion Rate (%)", range=[0, 105], secondary_y=True)
fig.update_layout(title="Queue Efficiency: Cost vs Completion Rate")
return fig
def chart_launch_method_dollars(df: pd.DataFrame) -> go.Figure | None:
"""Stacked bar: launch method spend per project."""
if df.empty:
return None
agg = df.groupby(["project_code", "launch_method"])["dollars_used"].sum().reset_index()
fig = px.bar(
agg,
x="project_code",
y="dollars_used",
color="launch_method",
barmode="stack",
color_discrete_map=COLORS["launch_methods"],
labels={"project_code": "Project", "dollars_used": "Dollars", "launch_method": "Method"},
title="Spend by Launch Method",
)
fig.update_yaxes(tickprefix="$")
return fig
def chart_launch_method_count(df: pd.DataFrame) -> go.Figure | None:
"""Stacked bar: launch method job count per project."""
if df.empty:
return None
agg = df.groupby(["project_code", "launch_method"]).size().reset_index(name="count")
fig = px.bar(
agg,
x="project_code",
y="count",
color="launch_method",
barmode="stack",
color_discrete_map=COLORS["launch_methods"],
labels={"project_code": "Project", "count": "Job Count", "launch_method": "Method"},
title="Jobs by Launch Method",
)
return fig
def chart_duration_distribution(df: pd.DataFrame) -> go.Figure | None:
"""Histogram of job walltime, colored by outcome category."""
valid = df[df["walltime_hours"].notna() & (df["walltime_hours"] > 0)].copy()
if valid.empty:
return None
color_col = "outcome_category" if "outcome_category" in valid.columns else "last_state"
color_map = (
COLORS["outcome_categories"] if color_col == "outcome_category" else COLORS["states"]
)
fig = px.histogram(
valid,
x="walltime_hours",
color=color_col,
nbins=30,
color_discrete_map=color_map,
labels={"walltime_hours": "Walltime (hours)", color_col: "Outcome"},
title="Job Duration Distribution by Outcome",
)
return fig
def chart_outcome_breakdown(df: pd.DataFrame) -> go.Figure | None:
"""Grouped bar per project: dollars by outcome category."""
if "outcome_category" not in df.columns:
return None
if df.empty:
return None
agg = df.groupby(["project_code", "outcome_category"])["dollars_used"].sum().reset_index()
fig = px.bar(
agg,
x="project_code",
y="dollars_used",
color="outcome_category",
barmode="group",
color_discrete_map=COLORS["outcome_categories"],
labels={
"project_code": "Project",
"dollars_used": "Dollars",
"outcome_category": "Outcome",
},
title="Spend by Session Outcome",
)
fig.update_yaxes(tickprefix="$")
return fig
def chart_efficiency_scatter(df: pd.DataFrame) -> go.Figure | None:
"""Scatter: walltime vs cost, colored by outcome category."""
valid = df[
df["walltime_hours"].notna()
& (df["walltime_hours"] > 0)
& df["dollars_used"].notna()
& (df["dollars_used"] > 0)
].copy()
if valid.empty:
return None
color_col = "outcome_category" if "outcome_category" in valid.columns else "last_state"
color_map = (
COLORS["outcome_categories"] if color_col == "outcome_category" else COLORS["states"]
)
fig = px.scatter(
valid,
x="walltime_hours",
y="dollars_used",
color=color_col,
color_discrete_map=color_map,
opacity=0.5,
log_x=True,
log_y=True,
labels={
"walltime_hours": "Walltime (hours)",
"dollars_used": "Cost ($)",
color_col: "Outcome",
},
title="Job Efficiency: Runtime vs Cost",
hover_data=["launch_method", "queue_name"],
)
return fig
def chart_resource_sizing(df: pd.DataFrame) -> go.Figure | None:
"""Box plot of CPU count per launch method for interactive sessions."""
from config import INTERACTIVE_METHODS
interactive = df[df["launch_method"].isin(INTERACTIVE_METHODS)].copy()
if interactive.empty or "cpu_count" not in interactive.columns:
return None
fig = px.box(
interactive,
x="launch_method",
y="cpu_count",
color="launch_method",
color_discrete_map=COLORS["launch_methods"],
labels={"launch_method": "Launch Method", "cpu_count": "CPU Count"},
title="Resource Allocation: Interactive Sessions",
)
fig.update_layout(showlegend=False)
return fig
def chart_budget_gauge(project: str, spent: float, allocation: float) -> go.Figure:
"""Single gauge indicator for project budget."""
pct = spent / allocation * 100 if allocation > 0 else 0
max_val = max(allocation * 1.2, spent)
fig = go.Figure(
go.Indicator(
mode="gauge+number",
value=spent,
number={"prefix": "$", "valueformat": ",.0f"},
title={"text": project},
gauge={
"axis": {"range": [0, max_val], "tickprefix": "$", "tickformat": ",.0f"},
"bar": {"color": "#2176AE"},
"steps": [
{"range": [0, allocation * 0.7], "color": "#D4EDDA"},
{"range": [allocation * 0.7, allocation * 0.9], "color": "#FFF3CD"},
{"range": [allocation * 0.9, max_val], "color": "#F8D7DA"},
],
"threshold": {
"line": {"color": "#E63946", "width": 3},
"thickness": 0.75,
"value": allocation,
},
},
)
)
fig.update_layout(height=250, margin=dict(t=60, b=20, l=30, r=30))
return fig