PLUS_Lab_GPUs / app.py
mohsenfayyaz's picture
Update app.py
0acf0dc verified
raw
history blame
12 kB
import os
import gradio as gr
import numpy as np
import pandas as pd
import datetime
import plotly.express as px
import plotly.graph_objects as go
import datasets
##### GPU PLOT #####
def split_multi_users(dfs):
df = dfs.copy()
df["usernames"] = df["username"].apply(lambda x: x.split(", "))
df["count"] = 1
new_df = []
for row in df.to_dict(orient="records"):
gpu_users_num = len(row["usernames"])
for username in row["usernames"]:
new_row = row.copy()
new_row["count"] = 1 / gpu_users_num
new_row["username"] = username
new_df.append(new_row)
df = pd.DataFrame(new_df)
return df
def plot_gpus():
### Load Data
dfs = datasets.load_dataset("pluslab/PLUS_Lab_GPUs_Data", data_files="gpus.csv", download_mode='force_redownload')["train"].to_pandas()
dfs = dfs.drop(columns=["Unnamed: 0"])
dfs = dfs.fillna("FREE")
dfs_plot = split_multi_users(dfs)
fig = px.bar(
dfs_plot, x="count", y="server", color="username",
title=f"Last Updated {min(dfs['timestamp'])}",
color_discrete_map={
"FREE": "black",
},
text=dfs_plot['username'].astype(str) + "<br>" + dfs_plot['device'].astype(str),
)
fig.update_layout(
yaxis={'categoryorder': 'array', 'categoryarray': dfs_plot["server"].unique()[::-1]},
barcornerradius=5,
)
fig.update_traces(textposition='inside', insidetextanchor='middle')
# print(dfs_plot)
return fig, dfs
##### DISK PLOT #####
def _pick_col(df, candidates):
norm = {c.strip().lower(): c for c in df.columns}
for cand in candidates:
cand = cand.strip().lower()
if cand in norm:
return norm[cand]
return None
def _kblocks_to_tib(kblocks): # shown as "TB" per your convention
return kblocks / (1024**3)
def _kblocks_to_gib(kblocks): # shown as "GB"
return kblocks / (1024**2)
def plot_disks(alert_threshold_pct=99.0):
df = datasets.load_dataset(
"pluslab/PLUS_Lab_GPUs_Data",
data_files="disks.csv",
download_mode="force_redownload",
)["train"].to_pandas()
if "Unnamed: 0" in df.columns:
df = df.drop(columns=["Unnamed: 0"])
df = pd.concat([df[df['Mounted'] != '/data2'], df[df['Mounted'] == '/data2'].drop_duplicates(subset=['Mounted'])]) # Keep one of /data2
server_col = _pick_col(df, ["server"])
fs_col = _pick_col(df, ["filesystem"])
blocks_col = _pick_col(df, ["1k-blocks", "1k blocks", "blocks"])
used_col = _pick_col(df, ["used"])
avail_col = _pick_col(df, ["available", "avail"])
mount_col = _pick_col(df, ["mounted", "mounted on", "mount", "mountpoint"])
required = [server_col, fs_col, blocks_col, used_col, avail_col]
if any(c is None for c in required):
raise ValueError(f"Missing required columns. Found: {list(df.columns)}")
for c in [blocks_col, used_col, avail_col]:
df[c] = pd.to_numeric(df[c], errors="coerce")
# Label
if mount_col is not None:
df["Label"] = df[server_col].astype(str) + " • " + df[mount_col].astype(str)
else:
df["Label"] = df[server_col].astype(str) + " • " + df[fs_col].astype(str)
# Totals & pct
df["Total_kb"] = df[used_col] + df[avail_col]
df["Used_pct"] = (df[used_col] / df["Total_kb"]) * 100.0
df["Used_pct"] = df["Used_pct"].clip(0, 100)
df["Avail_pct"] = (100.0 - df["Used_pct"]).clip(0, 100)
# Sizes
df["Used_TB"] = _kblocks_to_tib(df[used_col])
df["Total_TB"] = _kblocks_to_tib(df["Total_kb"])
df["Avail_GB"] = _kblocks_to_gib(df[avail_col]) # <-- GB for hovers
# Alerts
df["ALERT"] = df["Used_pct"] > alert_threshold_pct
# Sort
# df = df.sort_values("Total_kb", ascending=False).reset_index(drop=True)
df = df.sort_values("Mounted", ascending=False).reset_index(drop=True)
y_labels = np.where(df["ALERT"].to_numpy(), "⚠ " + df["Label"], df["Label"])
used_text = [f"{u:.2f} TB ({p:.0f}%)" for u, p in zip(df["Used_TB"], df["Used_pct"])]
total_annot = [f"{t:.2f} TB" for t in df["Total_TB"]]
avail_gb_0 = [f"{g:.0f} GB" for g in df["Avail_GB"]]
# Colors
COLOR_TOTAL = "#CBD5E1"
COLOR_USED = "#2563EB"
COLOR_FREE = "#94A3B8"
COLOR_ALERT = "#F59E0B"
used_colors = np.where(df["ALERT"].to_numpy(), COLOR_ALERT, COLOR_USED)
fig = go.Figure()
# Gray background hover: Available in GB (0dp)
fig.add_trace(
go.Bar(
y=y_labels,
x=[100] * len(df),
base=0,
orientation="h",
marker=dict(color=COLOR_TOTAL),
opacity=0.40,
hovertemplate="<b>%{y}</b><br>Available: %{customdata}<br><extra></extra>",
customdata=avail_gb_0,
showlegend=False,
)
)
# Used hover: Available in GB (0dp) too
fig.add_trace(
go.Bar(
y=y_labels,
x=df["Used_pct"],
base=0,
name=f"Used (>{alert_threshold_pct:.0f}% highlighted)",
orientation="h",
marker=dict(color=used_colors),
text=used_text,
textposition="inside",
insidetextanchor="middle",
hovertemplate=(
"<b>%{y}</b><br>"
"Used: %{customdata[0]} (%{customdata[3]:.2f}%)<br>"
"Available: %{customdata[1]}<br>"
"Total: %{customdata[2]}<br>"
"<extra></extra>"
),
customdata=np.stack(
[
df["Used_TB"].map(lambda v: f"{v:.2f} TB").to_numpy(),
df["Avail_GB"].map(lambda v: f"{v:.0f} GB").to_numpy(), # <-- changed
df["Total_TB"].map(lambda v: f"{v:.2f} TB").to_numpy(),
df["Used_pct"].to_numpy(),
],
axis=1,
),
)
)
# Available hover: Available in GB (0dp)
fig.add_trace(
go.Bar(
y=y_labels,
x=df["Avail_pct"],
base=df["Used_pct"],
name="Available",
orientation="h",
marker=dict(color=COLOR_FREE),
hovertemplate=(
"<b>%{y}</b><br>"
"Available: %{customdata[0]}<br>"
"Used: %{customdata[1]}<br>"
"Total: %{customdata[2]}<br>"
"<extra></extra>"
),
customdata=np.stack(
[
df["Avail_GB"].map(lambda v: f"{v:.0f} GB").to_numpy(), # <-- changed
df["Used_TB"].map(lambda v: f"{v:.2f} TB").to_numpy(),
df["Total_TB"].map(lambda v: f"{v:.2f} TB").to_numpy(),
],
axis=1,
),
)
)
# Total annotation (TB, 2dp)
for y, ttxt, is_alert in zip(y_labels, total_annot, df["ALERT"].to_numpy()):
fig.add_annotation(
x=100,
y=y,
text=ttxt,
showarrow=False,
xanchor="left",
yanchor="middle",
xshift=6,
font=dict(color=("#B45309" if is_alert else "#334155")),
)
fig.update_layout(
barmode="overlay",
template="plotly_white",
title=f"Disk usage (alerts: Used > {alert_threshold_pct:.0f}%)",
xaxis=dict(range=[0, 100], ticksuffix="%", title="Percent of total"),
yaxis_title="",
height=max(420, 28 * len(df)),
margin=dict(l=280, r=120, t=60, b=40),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
barcornerradius=4,
)
fig.update_yaxes(autorange="reversed")
return fig, df
def plot_users_disks(topk_to_show=5):
df = datasets.load_dataset(
"pluslab/PLUS_Lab_GPUs_Data",
data_files="users_disks.csv",
download_mode="force_redownload",
)["train"].to_pandas()
df["server_path"] = df["server"] + "/" + df["path"]
df["server_path"] = df["server_path"].apply(
lambda x: x.replace(".cs.ucla.edu", "")
)
df_return = df.copy()
df = df[df["path"] != "home"]
# Ensure numeric
df["size_GB"] = pd.to_numeric(df["size_GB"], errors="coerce").fillna(0)
# Preserve original appearance order
original_order = df["server_path"].drop_duplicates()
heatmap_df = df.pivot_table(
index="server_path",
columns="username",
values="size_GB",
aggfunc="sum",
fill_value=0,
)
# Reindex to preserve original order first
heatmap_df = heatmap_df.reindex(original_order)
# ----------- Custom ordering logic -----------
local1_rows = [p for p in heatmap_df.index if "/local1" in p]
home_rows = [p for p in heatmap_df.index if "/home" in p]
other_rows = [
p for p in heatmap_df.index
if p not in local1_rows and p not in home_rows
]
new_order = home_rows + local1_rows + other_rows
heatmap_df = heatmap_df.reindex(new_order)
# ---------------------------------------------
# --- Top-K users per server_path (row), then union across rows ---
# Don't include any special rows (e.g., Total) if they exist yet
base_df = heatmap_df.copy()
# For each row, get top-K usernames by usage (ignore zeros)
top_users_per_row = base_df.apply(
lambda row: row[row > 0].nlargest(topk_to_show).index,
axis=1
)
# Union of all selected users across rows
selected_users = pd.Index(sorted(set().union(*top_users_per_row.tolist())))
# Keep only those users (columns) that exist
heatmap_df = heatmap_df.loc[:, heatmap_df.columns.intersection(selected_users)]
# ---------------------------------------------------------------
# Add Total row LAST
heatmap_df.loc["Total"] = heatmap_df.sum(axis=0)
# Sort users by total usage (descending)
heatmap_df = heatmap_df[
heatmap_df.loc["Total"].sort_values(ascending=False).index
]
# -------- Reverse row order (keep Total last) --------
non_total_rows = [r for r in heatmap_df.index if r != "Total"]
reversed_rows = ["Total"] + list(reversed(non_total_rows))
heatmap_df = heatmap_df.reindex(reversed_rows)
# -----------------------------------------------------
# Text labels inside cells
text_values = heatmap_df.round(1).astype(str)
fig = go.Figure(
data=go.Heatmap(
z=heatmap_df.values,
x=heatmap_df.columns.tolist(),
y=heatmap_df.index.tolist(),
text=text_values.values,
texttemplate="%{text}",
textfont={"size": 12},
hovertemplate="path=%{y}<br>user=%{x}<br>size_GB=%{z:.2f}<extra></extra>",
colorscale="YlOrRd",
)
)
fig.update_layout(
title="Users Disk Usage Heatmap (GB)",
xaxis_title="username",
yaxis_title="server_path",
)
return fig, df_return
##### PLOT ALL #####
def plot_figs():
fig_gpus, fig_disks, fig_users_disks, df_gpus, df_disks, df_users_disks = None, None, None, None, None, None
try:
fig_gpus, df_gpus = plot_gpus()
fig_disks, df_disks = plot_disks()
fig_users_disks, df_users_disks = plot_users_disks()
except Exception as e:
print(e)
return fig_gpus, fig_disks, fig_users_disks, df_gpus, df_disks, df_users_disks
demo = gr.Interface(
fn=plot_figs,
inputs = [
],
outputs = [
gr.Plot(label="GPU Status", elem_classes="plotcss"),
gr.Plot(label="Disk Status", elem_classes="plotcss"),
gr.Plot(label="Users Disk Status", elem_classes="plotcss"),
gr.Dataframe(label="GPU Status Details"),
gr.Dataframe(label="Disk Status Details"),
gr.Dataframe(label="Users Disk Status Details"),
],
live=True,
flagging_options=[],
css=".plotcss {max-width: 820px !important;}",
delete_cache=(1, 1)
)
if __name__ == "__main__":
demo.launch(debug=False)