buckeyeguy commited on
Commit
ecc00b8
·
verified ·
1 Parent(s): 53fe9eb

Consolidate dashboard to single app.py

Browse files
.ruff_cache/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Automatically created by ruff.
2
+ *
.ruff_cache/0.15.2/10572574500376801884 ADDED
Binary file (197 Bytes). View file
 
.ruff_cache/0.15.2/11548578324518120789 ADDED
Binary file (62 Bytes). View file
 
.ruff_cache/CACHEDIR.TAG ADDED
@@ -0,0 +1 @@
 
 
1
+ Signature: 8a477f597d28d172789f06886806bc55
__pycache__/app.cpython-312.pyc ADDED
Binary file (13.1 kB). View file
 
__pycache__/charts.cpython-312.pyc ADDED
Binary file (7.23 kB). View file
 
__pycache__/components.cpython-312.pyc ADDED
Binary file (710 Bytes). View file
 
__pycache__/config.cpython-312.pyc ADDED
Binary file (625 Bytes). View file
 
__pycache__/data_loader.cpython-312.pyc ADDED
Binary file (2.08 kB). View file
 
app.py CHANGED
@@ -1,363 +1,331 @@
1
- """OSC Usage Dashboard — Streamlit app with Plotly charts, organized in tabs."""
2
 
3
  from __future__ import annotations
4
 
 
5
  from datetime import date, datetime, timedelta
 
6
 
 
 
 
7
  import streamlit as st
 
8
 
9
- from charts import (
10
- chart_budget_gauge,
11
- chart_burn_rate,
12
- chart_daily_usage,
13
- chart_dollars_by_user,
14
- chart_duration_distribution,
15
- chart_efficiency_scatter,
16
- chart_job_outcomes,
17
- chart_launch_method_count,
18
- chart_launch_method_dollars,
19
- chart_outcome_breakdown,
20
- chart_queue_efficiency,
21
- chart_resource_sizing,
22
- chart_spend_by_outcome,
23
- chart_usage_by_system,
24
- )
25
- from config import ALLOCATIONS, INTERACTIVE_METHODS, PROJECT_CODES
26
- from data_loader import filter_jobs, load_data
27
 
28
  st.set_page_config(page_title="OSC Usage Dashboard", layout="wide")
29
 
30
- # --- Load data ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  jobs, snapshots, metadata = load_data()
32
 
33
- # --- Sidebar ---
34
  with st.sidebar:
35
  st.title("OSC Usage Dashboard")
36
-
37
- # Freshness badge
38
  pushed_at = metadata.get("pushed_at", "")
39
  if pushed_at:
40
- pushed_dt = datetime.fromisoformat(pushed_at)
41
- age = datetime.now() - pushed_dt
42
  if age > timedelta(hours=24):
43
- st.warning(f"Data is {age.days}d {age.seconds // 3600}h old")
44
  else:
45
  st.success(f"Updated {pushed_at[:16]}")
46
- else:
47
- st.info("No timestamp in metadata")
48
-
49
  if st.button("Reload Data"):
50
  st.cache_data.clear()
51
  st.rerun()
52
-
53
  st.divider()
54
 
55
- # Date range
56
  all_dates = jobs["end_date"].dropna()
57
- if not all_dates.empty:
58
- min_date = all_dates.min()
59
- max_date = all_dates.max()
60
- else:
61
- min_date = date(2024, 7, 1)
62
- max_date = date.today()
63
-
64
  date_range = st.date_input(
65
- "Date Range",
66
- value=(min_date, max_date),
67
- min_value=min_date,
68
- max_value=max_date,
69
  )
70
- if isinstance(date_range, tuple) and len(date_range) == 2:
71
- date_filter = date_range
72
- else:
73
- date_filter = (min_date, max_date)
74
-
75
- # Project filter
76
- all_projects = sorted(jobs["project_code"].dropna().unique().tolist())
77
- selected_projects = st.multiselect("Projects", all_projects, default=all_projects)
78
-
79
- # User filter
80
- all_users = sorted(jobs["username"].dropna().unique().tolist())
81
- selected_users = st.multiselect("Users", all_users, default=all_users)
82
-
83
- # System filter
84
- all_systems = sorted(jobs["system_code"].dropna().unique().tolist())
85
- selected_systems = st.multiselect("Systems", all_systems, default=all_systems)
86
-
87
- # --- Apply filters ---
88
- filtered = filter_jobs(
89
- jobs,
90
- date_range=date_filter,
91
- projects=selected_projects,
92
- users=selected_users,
93
- systems=selected_systems,
94
- )
95
 
96
- # --- Metric cards (always visible above tabs) ---
97
  total_spend = filtered["dollars_used"].sum()
98
- total_jobs = len(filtered)
99
-
100
- # Batch completion %
101
- is_batch = ~filtered["launch_method"].isin(INTERACTIVE_METHODS)
102
- batch_jobs = filtered[is_batch]
103
- batch_total = len(batch_jobs)
104
- batch_completed = (batch_jobs["last_state"] == "COMPLETED").sum()
105
- batch_completion_pct = batch_completed / batch_total * 100 if batch_total > 0 else 0
106
-
107
- # Interactive spend %
108
- interactive_dollars = filtered[filtered["launch_method"].isin(INTERACTIVE_METHODS)][
109
- "dollars_used"
110
  ].sum()
111
- interactive_pct = interactive_dollars / total_spend * 100 if total_spend > 0 else 0
112
 
113
  m1, m2, m3, m4 = st.columns(4)
114
- m1.metric("Total Spend", f"${total_spend:,.2f}", help="Sum of all job costs in the filtered period")
115
- m2.metric("Total Jobs", f"{total_jobs:,}", help="Number of jobs in the filtered period")
116
- m3.metric(
117
- "Batch Completion %",
118
- f"{batch_completion_pct:.1f}%",
119
- help="% of batch jobs that completed successfully (interactive sessions excluded — they never exit as COMPLETED)",
120
- )
121
  m4.metric(
122
- "Interactive Spend %",
123
- f"{interactive_pct:.1f}%",
124
- help="% of total dollars spent on interactive sessions (Jupyter, Desktop, Code Server, etc.)",
125
  )
126
-
127
  st.divider()
128
 
129
- # --- 5 tabs ---
 
130
  tab_overview, tab_spend, tab_health, tab_user, tab_data = st.tabs(
131
  ["Overview", "Spend Analysis", "Job Health", "User Detail", "Raw Data"]
132
  )
133
 
134
- # === Overview tab ===
135
  with tab_overview:
136
- # Budget gauges per project
137
- gauge_cols = st.columns(len(PROJECT_CODES))
138
- for col, proj in zip(gauge_cols, PROJECT_CODES):
139
  with col:
140
- proj_spend = filtered.loc[filtered["project_code"] == proj, "dollars_used"].sum()
141
  alloc = ALLOCATIONS.get(proj, 0)
142
  if alloc > 0:
143
- fig = chart_budget_gauge(proj, proj_spend, alloc)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  st.plotly_chart(fig, use_container_width=True)
145
- else:
146
- st.metric(proj, f"${proj_spend:,.2f}", help="No allocation data")
147
 
148
- # Per-project balance from snapshots
149
- latest_snapshots = snapshots.sort_values("snapshot_date").groupby("project_code").last()
150
- snap_cols = st.columns(len(PROJECT_CODES))
151
- for col, proj in zip(snap_cols, PROJECT_CODES):
152
  with col:
153
- if proj in latest_snapshots.index:
154
- row = latest_snapshots.loc[proj]
155
- balance = row.get("current_balance", None)
156
- balance_str = f"${balance:,.2f}" if balance is not None else "N/A"
157
- proj_spend = filtered.loc[filtered["project_code"] == proj, "dollars_used"].sum()
158
  st.metric(
159
  f"{proj} Balance",
160
- balance_str,
161
- delta=f"-${proj_spend:,.2f} spent",
162
  delta_color="inverse",
163
  )
164
 
165
- # Burn rate (full width)
166
- fig = chart_burn_rate(filtered)
167
- if fig:
 
 
 
 
 
 
168
  st.plotly_chart(fig, use_container_width=True)
169
 
170
- # === Spend Analysis tab ===
171
  with tab_spend:
172
  left, right = st.columns(2)
173
  with left:
174
- fig = chart_usage_by_system(filtered)
175
- if fig:
176
- st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
177
  with right:
178
- fig = chart_dollars_by_user(filtered)
179
- if fig:
180
- st.plotly_chart(fig, use_container_width=True)
181
-
182
- left2, right2 = st.columns(2)
183
- with left2:
184
- fig = chart_launch_method_dollars(filtered)
185
- if fig:
186
- st.plotly_chart(fig, use_container_width=True)
187
- with right2:
188
- fig = chart_launch_method_count(filtered)
189
- if fig:
190
- st.plotly_chart(fig, use_container_width=True)
191
-
192
- # Daily usage (full width with range slider)
193
- fig = chart_daily_usage(filtered)
194
- if fig:
195
- st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
196
 
197
- # === Job Health tab ===
198
  with tab_health:
199
  left, right = st.columns(2)
200
  with left:
201
- fig = chart_job_outcomes(filtered)
202
- if fig:
203
- st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
204
  with right:
205
- fig = chart_spend_by_outcome(filtered)
206
- if fig:
207
- st.plotly_chart(fig, use_container_width=True)
208
-
209
- left2, right2 = st.columns(2)
210
- with left2:
211
- fig = chart_outcome_breakdown(filtered)
212
- if fig:
213
- st.plotly_chart(fig, use_container_width=True)
214
- with right2:
215
- fig = chart_efficiency_scatter(filtered)
216
- if fig:
217
- st.plotly_chart(fig, use_container_width=True)
218
-
219
- left3, right3 = st.columns(2)
220
- with left3:
221
- fig = chart_queue_efficiency(filtered)
222
- if fig:
223
- st.plotly_chart(fig, use_container_width=True)
224
- with right3:
225
- fig = chart_duration_distribution(filtered)
226
- if fig:
227
- st.plotly_chart(fig, use_container_width=True)
228
-
229
- # Resource sizing (full width)
230
- fig = chart_resource_sizing(filtered)
231
- if fig:
232
- st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
233
 
234
- # === User Detail tab ===
235
  with tab_user:
236
- users_in_data = sorted(filtered["username"].dropna().unique().tolist())
237
  if not users_in_data:
238
  st.info("No users in filtered data.")
239
  else:
240
- selected_user = st.selectbox("Select User", users_in_data)
241
- user_df = filtered[filtered["username"] == selected_user]
242
-
243
- # User metric cards
244
- u_spend = user_df["dollars_used"].sum()
245
- u_jobs = len(user_df)
246
- u_batch = user_df[~user_df["launch_method"].isin(INTERACTIVE_METHODS)]
247
- u_batch_total = len(u_batch)
248
- u_batch_completed = (u_batch["last_state"] == "COMPLETED").sum()
249
- u_batch_pct = u_batch_completed / u_batch_total * 100 if u_batch_total > 0 else 0
250
-
251
- um1, um2, um3 = st.columns(3)
252
- um1.metric("User Spend", f"${u_spend:,.2f}")
253
- um2.metric("User Jobs", f"{u_jobs:,}")
254
- um3.metric("Batch Completion %", f"{u_batch_pct:.1f}%")
255
-
256
- # User charts
257
  left, right = st.columns(2)
258
  with left:
259
- fig = chart_outcome_breakdown(user_df)
260
- if fig:
261
- st.plotly_chart(fig, use_container_width=True)
262
- with right:
263
- fig = chart_efficiency_scatter(user_df)
264
- if fig:
265
- st.plotly_chart(fig, use_container_width=True)
266
-
267
- left2, right2 = st.columns(2)
268
- with left2:
269
- fig = chart_launch_method_dollars(user_df)
270
- if fig:
271
- st.plotly_chart(fig, use_container_width=True)
272
- with right2:
273
- fig = chart_resource_sizing(user_df)
274
- if fig:
275
- st.plotly_chart(fig, use_container_width=True)
276
-
277
- # Daily usage (full width)
278
- fig = chart_daily_usage(user_df)
279
- if fig:
280
- st.plotly_chart(fig, use_container_width=True)
281
-
282
- # Duration distribution (full width)
283
- fig = chart_duration_distribution(user_df)
284
- if fig:
285
- st.plotly_chart(fig, use_container_width=True)
286
-
287
- # User job table
288
- st.subheader(f"{selected_user}'s Jobs ({u_jobs:,} rows)")
289
- display_cols = [
290
- c
291
- for c in [
292
- "job_id",
293
- "project_code",
294
- "system_code",
295
- "queue_name",
296
- "launch_method",
297
- "last_state",
298
- "outcome_category",
299
- "walltime_hours",
300
- "dollars_used",
301
- "end_time",
302
- ]
303
- if c in user_df.columns
304
- ]
305
- user_display = user_df[display_cols].sort_values("end_time", ascending=False)
306
- col_config = {}
307
- if "dollars_used" in user_display.columns:
308
- col_config["dollars_used"] = st.column_config.NumberColumn("Cost ($)", format="$%.2f")
309
- if "walltime_hours" in user_display.columns:
310
- col_config["walltime_hours"] = st.column_config.NumberColumn(
311
- "Walltime (hrs)", format="%.1f"
312
  )
313
- st.dataframe(
314
- user_display, use_container_width=True, column_config=col_config, hide_index=True
 
 
 
 
 
 
 
 
315
  )
316
 
317
- # === Raw Data tab ===
318
  with tab_data:
319
  st.subheader(f"Filtered Jobs ({len(filtered):,} rows)")
320
-
321
- # Format for display
322
- display_cols = [
323
- c
324
- for c in [
325
- "job_id",
326
- "username",
327
- "project_code",
328
- "system_code",
329
- "queue_name",
330
- "launch_method",
331
- "last_state",
332
- "outcome_category",
333
- "walltime_hours",
334
- "dollars_used",
335
- "submit_time",
336
- "start_time",
337
- "end_time",
338
- ]
339
- if c in filtered.columns
340
- ]
341
- display_df = filtered[display_cols].copy()
342
-
343
- # Format columns for readability
344
- col_config = {}
345
- if "dollars_used" in display_df.columns:
346
- col_config["dollars_used"] = st.column_config.NumberColumn("Cost ($)", format="$%.2f")
347
- if "walltime_hours" in display_df.columns:
348
- col_config["walltime_hours"] = st.column_config.NumberColumn(
349
- "Walltime (hrs)", format="%.1f"
350
- )
351
-
352
- st.dataframe(display_df, use_container_width=True, column_config=col_config, hide_index=True)
353
-
354
- # Download button
355
- csv = filtered[display_cols].to_csv(index=False)
356
  st.download_button(
357
- label="Download filtered data as CSV",
358
- data=csv,
359
- file_name="osc_usage_filtered.csv",
360
- mime="text/csv",
361
  )
362
 
363
  st.caption(f"Data from OSCusage CLI | {metadata.get('job_count', '?')} jobs in dataset")
 
1
+ """OSC Usage Dashboard."""
2
 
3
  from __future__ import annotations
4
 
5
+ import json
6
  from datetime import date, datetime, timedelta
7
+ from pathlib import Path
8
 
9
+ import pandas as pd
10
+ import plotly.express as px
11
+ import plotly.graph_objects as go
12
  import streamlit as st
13
+ from huggingface_hub import snapshot_download
14
 
15
+ DATASET_REPO = "buckeyeguy/osc-usage-data"
16
+ PROJECT_CODES = ["PAS1266", "PAS3209"]
17
+ ALLOCATIONS = {"PAS1266": 2257.0, "PAS3209": 1211.0}
18
+ INTERACTIVE_METHODS = frozenset({"Jupyter", "Desktop", "Code Server", "MATLAB", "MLflow"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  st.set_page_config(page_title="OSC Usage Dashboard", layout="wide")
21
 
22
+
23
+ @st.cache_data(ttl=300)
24
+ def load_data():
25
+ snap = Path(snapshot_download(repo_id=DATASET_REPO, repo_type="dataset"))
26
+ jobs = pd.read_parquet(snap / "jobs.parquet")
27
+ snapshots = pd.read_parquet(snap / "snapshots.parquet")
28
+ metadata = json.loads((snap / "metadata.json").read_text())
29
+ for col in ["submit_time", "start_time", "end_time"]:
30
+ if col in jobs.columns:
31
+ jobs[col] = pd.to_datetime(jobs[col])
32
+ return jobs, snapshots, metadata
33
+
34
+
35
+ def chart(df, kind="histogram", **kwargs):
36
+ if df.empty:
37
+ return
38
+ st.plotly_chart(getattr(px, kind)(df, **kwargs), use_container_width=True)
39
+
40
+
41
+ def agg(df, by, value="dollars_used"):
42
+ """Sum-by-group stat. Pre-aggregates so we can use px.bar (geom) instead of px.histogram (stat)."""
43
+ return df.groupby(by, dropna=False)[value].sum().reset_index()
44
+
45
+
46
+ def daily(df, by):
47
+ return (
48
+ df.dropna(subset=["end_date"]).groupby(["end_date", by])["dollars_used"].sum().reset_index()
49
+ )
50
+
51
+
52
+ def runtime_vs_cost(df):
53
+ chart(
54
+ df[(df["walltime_hours"] > 0) & (df["dollars_used"] > 0)],
55
+ "scatter",
56
+ x="walltime_hours",
57
+ y="dollars_used",
58
+ color="outcome_category",
59
+ log_x=True,
60
+ log_y=True,
61
+ opacity=0.5,
62
+ title="Runtime vs Cost",
63
+ )
64
+
65
+
66
+ # --- Load + Sidebar ---
67
+
68
  jobs, snapshots, metadata = load_data()
69
 
 
70
  with st.sidebar:
71
  st.title("OSC Usage Dashboard")
 
 
72
  pushed_at = metadata.get("pushed_at", "")
73
  if pushed_at:
74
+ age = datetime.now() - datetime.fromisoformat(pushed_at)
 
75
  if age > timedelta(hours=24):
76
+ st.warning(f"Data is {age.days}d old")
77
  else:
78
  st.success(f"Updated {pushed_at[:16]}")
 
 
 
79
  if st.button("Reload Data"):
80
  st.cache_data.clear()
81
  st.rerun()
 
82
  st.divider()
83
 
 
84
  all_dates = jobs["end_date"].dropna()
85
+ min_date, max_date = (
86
+ (all_dates.min(), all_dates.max())
87
+ if not all_dates.empty
88
+ else (date(2024, 7, 1), date.today())
89
+ )
 
 
90
  date_range = st.date_input(
91
+ "Date Range", value=(min_date, max_date), min_value=min_date, max_value=max_date
 
 
 
92
  )
93
+ date_filter = (
94
+ date_range
95
+ if isinstance(date_range, tuple) and len(date_range) == 2
96
+ else (min_date, max_date)
97
+ )
98
+
99
+ selections = {}
100
+ for label, col in [
101
+ ("Projects", "project_code"),
102
+ ("Users", "username"),
103
+ ("Systems", "system_code"),
104
+ ]:
105
+ opts = sorted(jobs[col].dropna().unique())
106
+ selections[col] = st.multiselect(label, opts, default=opts)
107
+
108
+ filtered = jobs[
109
+ jobs["end_date"].between(date_filter[0], date_filter[1])
110
+ & jobs["project_code"].isin(selections["project_code"])
111
+ & jobs["username"].isin(selections["username"])
112
+ & jobs["system_code"].isin(selections["system_code"])
113
+ ]
114
+
115
+ # --- Metric Cards ---
 
 
116
 
 
117
  total_spend = filtered["dollars_used"].sum()
118
+ batch = filtered[~filtered["launch_method"].isin(INTERACTIVE_METHODS)]
119
+ interactive_dollars = filtered.loc[
120
+ filtered["launch_method"].isin(INTERACTIVE_METHODS), "dollars_used"
 
 
 
 
 
 
 
 
 
121
  ].sum()
 
122
 
123
  m1, m2, m3, m4 = st.columns(4)
124
+ m1.metric("Total Spend", f"${total_spend:,.2f}")
125
+ m2.metric("Total Jobs", f"{len(filtered):,}")
126
+ m3.metric("Batch Completion %", f"{(batch['last_state'] == 'COMPLETED').mean() * 100:.1f}%")
 
 
 
 
127
  m4.metric(
128
+ "Interactive Spend %", f"{interactive_dollars / total_spend * 100 if total_spend else 0:.1f}%"
 
 
129
  )
 
130
  st.divider()
131
 
132
+ # --- Tabs ---
133
+
134
  tab_overview, tab_spend, tab_health, tab_user, tab_data = st.tabs(
135
  ["Overview", "Spend Analysis", "Job Health", "User Detail", "Raw Data"]
136
  )
137
 
 
138
  with tab_overview:
139
+ for col, proj in zip(st.columns(len(PROJECT_CODES)), PROJECT_CODES):
 
 
140
  with col:
141
+ ps = filtered.loc[filtered["project_code"] == proj, "dollars_used"].sum()
142
  alloc = ALLOCATIONS.get(proj, 0)
143
  if alloc > 0:
144
+ mx = max(alloc * 1.2, ps)
145
+ fig = go.Figure(
146
+ go.Indicator(
147
+ mode="gauge+number",
148
+ value=ps,
149
+ number={"prefix": "$", "valueformat": ",.0f"},
150
+ title={"text": proj},
151
+ gauge={
152
+ "axis": {"range": [0, mx], "tickprefix": "$"},
153
+ "bar": {"color": px.colors.qualitative.Plotly[0]},
154
+ "steps": [
155
+ {"range": [0, alloc * 0.7], "color": "#D4EDDA"},
156
+ {"range": [alloc * 0.7, alloc * 0.9], "color": "#FFF3CD"},
157
+ {"range": [alloc * 0.9, mx], "color": "#F8D7DA"},
158
+ ],
159
+ "threshold": {
160
+ "line": {"color": "red", "width": 3},
161
+ "thickness": 0.75,
162
+ "value": alloc,
163
+ },
164
+ },
165
+ )
166
+ )
167
+ fig.update_layout(height=250, margin=dict(t=60, b=20, l=30, r=30))
168
  st.plotly_chart(fig, use_container_width=True)
 
 
169
 
170
+ latest = snapshots.sort_values("snapshot_date").groupby("project_code").last()
171
+ for col, proj in zip(st.columns(len(PROJECT_CODES)), PROJECT_CODES):
 
 
172
  with col:
173
+ if proj in latest.index:
174
+ bal = latest.loc[proj].get("current_balance")
175
+ ps = filtered.loc[filtered["project_code"] == proj, "dollars_used"].sum()
 
 
176
  st.metric(
177
  f"{proj} Balance",
178
+ f"${bal:,.2f}" if bal is not None else "N/A",
179
+ delta=f"-${ps:,.2f} spent",
180
  delta_color="inverse",
181
  )
182
 
183
+ burn = daily(filtered, "project_code")
184
+ burn["cumulative"] = burn.groupby("project_code")["dollars_used"].cumsum()
185
+ if not burn.empty:
186
+ fig = px.area(
187
+ burn, x="end_date", y="cumulative", color="project_code", title="Budget Burn Rate"
188
+ )
189
+ fig.update_yaxes(tickprefix="$")
190
+ for proj, alloc in ALLOCATIONS.items():
191
+ fig.add_hline(y=alloc, line_dash="dot", annotation_text=f"{proj}: ${alloc:,.0f}")
192
  st.plotly_chart(fig, use_container_width=True)
193
 
 
194
  with tab_spend:
195
  left, right = st.columns(2)
196
  with left:
197
+ chart(
198
+ agg(filtered, ["system_code", "subtype_code"]),
199
+ "bar",
200
+ x="system_code",
201
+ y="dollars_used",
202
+ color="subtype_code",
203
+ barmode="group",
204
+ title="Usage by System",
205
+ )
206
  with right:
207
+ chart(filtered, "pie", names="username", values="dollars_used", title="Dollars by User")
208
+ left, right = st.columns(2)
209
+ with left:
210
+ chart(
211
+ agg(filtered, ["project_code", "launch_method"]),
212
+ "bar",
213
+ x="project_code",
214
+ y="dollars_used",
215
+ color="launch_method",
216
+ title="Spend by Launch Method",
217
+ )
218
+ with right:
219
+ chart(
220
+ filtered,
221
+ "histogram",
222
+ x="project_code",
223
+ color="launch_method",
224
+ barmode="stack",
225
+ title="Jobs by Launch Method",
226
+ )
227
+ chart(
228
+ daily(filtered, "system_code"),
229
+ "line",
230
+ x="end_date",
231
+ y="dollars_used",
232
+ color="system_code",
233
+ markers=True,
234
+ title="Daily Usage",
235
+ )
236
 
 
237
  with tab_health:
238
  left, right = st.columns(2)
239
  with left:
240
+ chart(
241
+ filtered.dropna(subset=["end_month"]),
242
+ "histogram",
243
+ x="end_month",
244
+ color="last_state",
245
+ barmode="stack",
246
+ title="Job Outcomes by Month",
247
+ )
248
  with right:
249
+ chart(
250
+ agg(filtered, ["outcome_category", "project_code"]),
251
+ "bar",
252
+ x="outcome_category",
253
+ y="dollars_used",
254
+ color="project_code",
255
+ barmode="group",
256
+ title="Spend by Outcome",
257
+ )
258
+ queue_cost = (
259
+ filtered.groupby("queue_name")
260
+ .agg(n=("job_id", "count"), avg_cost=("dollars_used", "mean"))
261
+ .reset_index()
262
+ .query("n >= 10")
263
+ .sort_values("avg_cost", ascending=False)
264
+ )
265
+ if not queue_cost.empty:
266
+ chart(
267
+ queue_cost,
268
+ "bar",
269
+ x="queue_name",
270
+ y="avg_cost",
271
+ text="n",
272
+ title="Avg Cost per Job by Queue (n≥10)",
273
+ )
274
+ left, right = st.columns(2)
275
+ with left:
276
+ chart(
277
+ filtered[filtered["walltime_hours"] > 0],
278
+ "histogram",
279
+ x="walltime_hours",
280
+ color="outcome_category",
281
+ nbins=30,
282
+ title="Duration Distribution",
283
+ )
284
+ with right:
285
+ runtime_vs_cost(filtered)
286
 
 
287
  with tab_user:
288
+ users_in_data = sorted(filtered["username"].dropna().unique())
289
  if not users_in_data:
290
  st.info("No users in filtered data.")
291
  else:
292
+ user = st.selectbox("Select User", users_in_data)
293
+ udf = filtered[filtered["username"] == user]
294
+ ub = udf[~udf["launch_method"].isin(INTERACTIVE_METHODS)]
295
+ m1, m2, m3 = st.columns(3)
296
+ m1.metric("Spend", f"${udf['dollars_used'].sum():,.2f}")
297
+ m2.metric("Jobs", f"{len(udf):,}")
298
+ m3.metric(
299
+ "Batch Completion %",
300
+ f"{(ub['last_state'] == 'COMPLETED').mean() * 100:.1f}%" if len(ub) else "N/A",
301
+ )
 
 
 
 
 
 
 
302
  left, right = st.columns(2)
303
  with left:
304
+ chart(
305
+ agg(udf, ["launch_method", "outcome_category"]),
306
+ "bar",
307
+ x="launch_method",
308
+ y="dollars_used",
309
+ color="outcome_category",
310
+ title="Spend by Method & Outcome",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  )
312
+ with right:
313
+ runtime_vs_cost(udf)
314
+ chart(
315
+ daily(udf, "system_code"),
316
+ "line",
317
+ x="end_date",
318
+ y="dollars_used",
319
+ color="system_code",
320
+ markers=True,
321
+ title="Daily Usage",
322
  )
323
 
 
324
  with tab_data:
325
  st.subheader(f"Filtered Jobs ({len(filtered):,} rows)")
326
+ st.dataframe(filtered, use_container_width=True, hide_index=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  st.download_button(
328
+ "Download CSV", filtered.to_csv(index=False), "osc_usage_filtered.csv", "text/csv"
 
 
 
329
  )
330
 
331
  st.caption(f"Data from OSCusage CLI | {metadata.get('job_count', '?')} jobs in dataset")