Spaces:

pluslab
/

PLUS_Lab_GPUs

Running

App Files Files Community

PLUS_Lab_GPUs / app.py

mohsenfayyaz

Update app.py

0acf0dc verified 6 days ago

raw

history blame

12 kB

	import os
	import gradio as gr
	import numpy as np
	import pandas as pd
	import datetime
	import plotly.express as px
	import plotly.graph_objects as go
	import datasets


	##### GPU PLOT #####
	def split_multi_users(dfs):
	df = dfs.copy()
	df["usernames"] = df["username"].apply(lambda x: x.split(", "))
	df["count"] = 1
	new_df = []
	for row in df.to_dict(orient="records"):
	gpu_users_num = len(row["usernames"])
	for username in row["usernames"]:
	new_row = row.copy()
	new_row["count"] = 1 / gpu_users_num
	new_row["username"] = username
	new_df.append(new_row)
	df = pd.DataFrame(new_df)
	return df

	def plot_gpus():
	### Load Data
	dfs = datasets.load_dataset("pluslab/PLUS_Lab_GPUs_Data", data_files="gpus.csv", download_mode='force_redownload')["train"].to_pandas()
	dfs = dfs.drop(columns=["Unnamed: 0"])
	dfs = dfs.fillna("FREE")
	dfs_plot = split_multi_users(dfs)
	fig = px.bar(
	dfs_plot, x="count", y="server", color="username",
	title=f"Last Updated {min(dfs['timestamp'])}",
	color_discrete_map={
	"FREE": "black",
	},
	text=dfs_plot['username'].astype(str) + "<br>" + dfs_plot['device'].astype(str),
	)
	fig.update_layout(
	yaxis={'categoryorder': 'array', 'categoryarray': dfs_plot["server"].unique()[::-1]},
	barcornerradius=5,
	)
	fig.update_traces(textposition='inside', insidetextanchor='middle')
	# print(dfs_plot)
	return fig, dfs

	##### DISK PLOT #####
	def _pick_col(df, candidates):
	norm = {c.strip().lower(): c for c in df.columns}
	for cand in candidates:
	cand = cand.strip().lower()
	if cand in norm:
	return norm[cand]
	return None

	def _kblocks_to_tib(kblocks): # shown as "TB" per your convention
	return kblocks / (1024**3)

	def _kblocks_to_gib(kblocks): # shown as "GB"
	return kblocks / (1024**2)

	def plot_disks(alert_threshold_pct=99.0):
	df = datasets.load_dataset(
	"pluslab/PLUS_Lab_GPUs_Data",
	data_files="disks.csv",
	download_mode="force_redownload",
	)["train"].to_pandas()

	if "Unnamed: 0" in df.columns:
	df = df.drop(columns=["Unnamed: 0"])

	df = pd.concat([df[df['Mounted'] != '/data2'], df[df['Mounted'] == '/data2'].drop_duplicates(subset=['Mounted'])]) # Keep one of /data2

	server_col = _pick_col(df, ["server"])
	fs_col = _pick_col(df, ["filesystem"])
	blocks_col = _pick_col(df, ["1k-blocks", "1k blocks", "blocks"])
	used_col = _pick_col(df, ["used"])
	avail_col = _pick_col(df, ["available", "avail"])
	mount_col = _pick_col(df, ["mounted", "mounted on", "mount", "mountpoint"])

	required = [server_col, fs_col, blocks_col, used_col, avail_col]
	if any(c is None for c in required):
	raise ValueError(f"Missing required columns. Found: {list(df.columns)}")

	for c in [blocks_col, used_col, avail_col]:
	df[c] = pd.to_numeric(df[c], errors="coerce")

	# Label
	if mount_col is not None:
	df["Label"] = df[server_col].astype(str) + " • " + df[mount_col].astype(str)
	else:
	df["Label"] = df[server_col].astype(str) + " • " + df[fs_col].astype(str)

	# Totals & pct
	df["Total_kb"] = df[used_col] + df[avail_col]
	df["Used_pct"] = (df[used_col] / df["Total_kb"]) * 100.0
	df["Used_pct"] = df["Used_pct"].clip(0, 100)
	df["Avail_pct"] = (100.0 - df["Used_pct"]).clip(0, 100)

	# Sizes
	df["Used_TB"] = _kblocks_to_tib(df[used_col])
	df["Total_TB"] = _kblocks_to_tib(df["Total_kb"])
	df["Avail_GB"] = _kblocks_to_gib(df[avail_col]) # <-- GB for hovers

	# Alerts
	df["ALERT"] = df["Used_pct"] > alert_threshold_pct

	# Sort
	# df = df.sort_values("Total_kb", ascending=False).reset_index(drop=True)
	df = df.sort_values("Mounted", ascending=False).reset_index(drop=True)

	y_labels = np.where(df["ALERT"].to_numpy(), "⚠ " + df["Label"], df["Label"])

	used_text = [f"{u:.2f} TB ({p:.0f}%)" for u, p in zip(df["Used_TB"], df["Used_pct"])]
	total_annot = [f"{t:.2f} TB" for t in df["Total_TB"]]
	avail_gb_0 = [f"{g:.0f} GB" for g in df["Avail_GB"]]

	# Colors
	COLOR_TOTAL = "#CBD5E1"
	COLOR_USED = "#2563EB"
	COLOR_FREE = "#94A3B8"
	COLOR_ALERT = "#F59E0B"
	used_colors = np.where(df["ALERT"].to_numpy(), COLOR_ALERT, COLOR_USED)

	fig = go.Figure()

	# Gray background hover: Available in GB (0dp)
	fig.add_trace(
	go.Bar(
	y=y_labels,
	x=[100] * len(df),
	base=0,
	orientation="h",
	marker=dict(color=COLOR_TOTAL),
	opacity=0.40,
	hovertemplate="<b>%{y}</b><br>Available: %{customdata}<br><extra></extra>",
	customdata=avail_gb_0,
	showlegend=False,
	)
	)

	# Used hover: Available in GB (0dp) too
	fig.add_trace(
	go.Bar(
	y=y_labels,
	x=df["Used_pct"],
	base=0,
	name=f"Used (>{alert_threshold_pct:.0f}% highlighted)",
	orientation="h",
	marker=dict(color=used_colors),
	text=used_text,
	textposition="inside",
	insidetextanchor="middle",
	hovertemplate=(
	"<b>%{y}</b><br>"
	"Used: %{customdata[0]} (%{customdata[3]:.2f}%)<br>"
	"Available: %{customdata[1]}<br>"
	"Total: %{customdata[2]}<br>"
	"<extra></extra>"
	),
	customdata=np.stack(
	[
	df["Used_TB"].map(lambda v: f"{v:.2f} TB").to_numpy(),
	df["Avail_GB"].map(lambda v: f"{v:.0f} GB").to_numpy(), # <-- changed
	df["Total_TB"].map(lambda v: f"{v:.2f} TB").to_numpy(),
	df["Used_pct"].to_numpy(),
	],
	axis=1,
	),
	)
	)

	# Available hover: Available in GB (0dp)
	fig.add_trace(
	go.Bar(
	y=y_labels,
	x=df["Avail_pct"],
	base=df["Used_pct"],
	name="Available",
	orientation="h",
	marker=dict(color=COLOR_FREE),
	hovertemplate=(
	"<b>%{y}</b><br>"
	"Available: %{customdata[0]}<br>"
	"Used: %{customdata[1]}<br>"
	"Total: %{customdata[2]}<br>"
	"<extra></extra>"
	),
	customdata=np.stack(
	[
	df["Avail_GB"].map(lambda v: f"{v:.0f} GB").to_numpy(), # <-- changed
	df["Used_TB"].map(lambda v: f"{v:.2f} TB").to_numpy(),
	df["Total_TB"].map(lambda v: f"{v:.2f} TB").to_numpy(),
	],
	axis=1,
	),
	)
	)

	# Total annotation (TB, 2dp)
	for y, ttxt, is_alert in zip(y_labels, total_annot, df["ALERT"].to_numpy()):
	fig.add_annotation(
	x=100,
	y=y,
	text=ttxt,
	showarrow=False,
	xanchor="left",
	yanchor="middle",
	xshift=6,
	font=dict(color=("#B45309" if is_alert else "#334155")),
	)

	fig.update_layout(
	barmode="overlay",
	template="plotly_white",
	title=f"Disk usage (alerts: Used > {alert_threshold_pct:.0f}%)",
	xaxis=dict(range=[0, 100], ticksuffix="%", title="Percent of total"),
	yaxis_title="",
	height=max(420, 28 * len(df)),
	margin=dict(l=280, r=120, t=60, b=40),
	legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
	barcornerradius=4,
	)
	fig.update_yaxes(autorange="reversed")
	return fig, df


	def plot_users_disks(topk_to_show=5):
	df = datasets.load_dataset(
	"pluslab/PLUS_Lab_GPUs_Data",
	data_files="users_disks.csv",
	download_mode="force_redownload",
	)["train"].to_pandas()

	df["server_path"] = df["server"] + "/" + df["path"]
	df["server_path"] = df["server_path"].apply(
	lambda x: x.replace(".cs.ucla.edu", "")
	)
	df_return = df.copy()
	df = df[df["path"] != "home"]

	# Ensure numeric
	df["size_GB"] = pd.to_numeric(df["size_GB"], errors="coerce").fillna(0)

	# Preserve original appearance order
	original_order = df["server_path"].drop_duplicates()

	heatmap_df = df.pivot_table(
	index="server_path",
	columns="username",
	values="size_GB",
	aggfunc="sum",
	fill_value=0,
	)

	# Reindex to preserve original order first
	heatmap_df = heatmap_df.reindex(original_order)

	# ----------- Custom ordering logic -----------
	local1_rows = [p for p in heatmap_df.index if "/local1" in p]
	home_rows = [p for p in heatmap_df.index if "/home" in p]
	other_rows = [
	p for p in heatmap_df.index
	if p not in local1_rows and p not in home_rows
	]

	new_order = home_rows + local1_rows + other_rows
	heatmap_df = heatmap_df.reindex(new_order)
	# ---------------------------------------------

	# --- Top-K users per server_path (row), then union across rows ---
	# Don't include any special rows (e.g., Total) if they exist yet
	base_df = heatmap_df.copy()

	# For each row, get top-K usernames by usage (ignore zeros)
	top_users_per_row = base_df.apply(
	lambda row: row[row > 0].nlargest(topk_to_show).index,
	axis=1
	)

	# Union of all selected users across rows
	selected_users = pd.Index(sorted(set().union(*top_users_per_row.tolist())))

	# Keep only those users (columns) that exist
	heatmap_df = heatmap_df.loc[:, heatmap_df.columns.intersection(selected_users)]
	# ---------------------------------------------------------------

	# Add Total row LAST
	heatmap_df.loc["Total"] = heatmap_df.sum(axis=0)

	# Sort users by total usage (descending)
	heatmap_df = heatmap_df[
	heatmap_df.loc["Total"].sort_values(ascending=False).index
	]

	# -------- Reverse row order (keep Total last) --------
	non_total_rows = [r for r in heatmap_df.index if r != "Total"]
	reversed_rows = ["Total"] + list(reversed(non_total_rows))
	heatmap_df = heatmap_df.reindex(reversed_rows)
	# -----------------------------------------------------

	# Text labels inside cells
	text_values = heatmap_df.round(1).astype(str)

	fig = go.Figure(
	data=go.Heatmap(
	z=heatmap_df.values,
	x=heatmap_df.columns.tolist(),
	y=heatmap_df.index.tolist(),
	text=text_values.values,
	texttemplate="%{text}",
	textfont={"size": 12},
	hovertemplate="path=%{y}<br>user=%{x}<br>size_GB=%{z:.2f}<extra></extra>",
	colorscale="YlOrRd",
	)
	)

	fig.update_layout(
	title="Users Disk Usage Heatmap (GB)",
	xaxis_title="username",
	yaxis_title="server_path",
	)

	return fig, df_return


	##### PLOT ALL #####
	def plot_figs():
	fig_gpus, fig_disks, fig_users_disks, df_gpus, df_disks, df_users_disks = None, None, None, None, None, None
	try:
	fig_gpus, df_gpus = plot_gpus()
	fig_disks, df_disks = plot_disks()
	fig_users_disks, df_users_disks = plot_users_disks()
	except Exception as e:
	print(e)
	return fig_gpus, fig_disks, fig_users_disks, df_gpus, df_disks, df_users_disks

	demo = gr.Interface(
	fn=plot_figs,
	inputs = [

	],
	outputs = [
	gr.Plot(label="GPU Status", elem_classes="plotcss"),
	gr.Plot(label="Disk Status", elem_classes="plotcss"),
	gr.Plot(label="Users Disk Status", elem_classes="plotcss"),
	gr.Dataframe(label="GPU Status Details"),
	gr.Dataframe(label="Disk Status Details"),
	gr.Dataframe(label="Users Disk Status Details"),
	],
	live=True,
	flagging_options=[],
	css=".plotcss {max-width: 820px !important;}",
	delete_cache=(1, 1)
	)

	if __name__ == "__main__":
	demo.launch(debug=False)