Spaces:
Runtime error
Runtime error
Mohsen Fayyaz
commited on
Commit
·
3da2f6b
0
Parent(s):
commit message
Browse files- .gitattributes +35 -0
- README.md +12 -0
- app.py +76 -0
- data/test.txt +1 -0
- gpus.csv +45 -0
- history.pkl.gz +3 -0
- requirements.txt +1 -0
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: PLUS Lab GPUs
|
| 3 |
+
emoji: ➕
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.13.1
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import plotly.express as px
|
| 4 |
+
|
| 5 |
+
def split_multi_users(dfs):
|
| 6 |
+
df = dfs.copy()
|
| 7 |
+
df["usernames"] = df["username"].apply(lambda x: x.split(", "))
|
| 8 |
+
df["count"] = 1
|
| 9 |
+
new_df = []
|
| 10 |
+
for row in df.to_dict(orient="records"):
|
| 11 |
+
gpu_users_num = len(row["usernames"])
|
| 12 |
+
for username in row["usernames"]:
|
| 13 |
+
new_row = row.copy()
|
| 14 |
+
new_row["count"] = 1 / gpu_users_num
|
| 15 |
+
new_row["username"] = username
|
| 16 |
+
new_df.append(new_row)
|
| 17 |
+
df = pd.DataFrame(new_df)
|
| 18 |
+
return df
|
| 19 |
+
|
| 20 |
+
def plot_now():
|
| 21 |
+
dfs = pd.read_csv("hf://spaces/mohsenfayyaz/PLUS_Lab_GPUs/gpus.csv")
|
| 22 |
+
dfs = dfs.drop(columns=["Unnamed: 0"])
|
| 23 |
+
dfs = dfs.fillna("FREE")
|
| 24 |
+
dfs_plot = split_multi_users(dfs)
|
| 25 |
+
fig = px.bar(
|
| 26 |
+
dfs_plot, x="count", y="server", color="username",
|
| 27 |
+
title=f"Last Updated {min(dfs['timestamp'])}",
|
| 28 |
+
color_discrete_map={
|
| 29 |
+
"FREE": "black",
|
| 30 |
+
},
|
| 31 |
+
text=dfs_plot['username'].astype(str) + "<br>" + dfs_plot['device'].astype(str),
|
| 32 |
+
)
|
| 33 |
+
fig.update_layout(
|
| 34 |
+
yaxis={'categoryorder': 'array', 'categoryarray': dfs_plot["server"].unique()[::-1]},
|
| 35 |
+
barcornerradius=50,
|
| 36 |
+
)
|
| 37 |
+
fig.update_traces(textposition='inside', insidetextanchor='middle')
|
| 38 |
+
return fig, dfs
|
| 39 |
+
|
| 40 |
+
def plot_history():
|
| 41 |
+
dfh = pd.read_pickle("hf://spaces/mohsenfayyaz/PLUS_Lab_GPUs/history.pkl.gz", )
|
| 42 |
+
dfh = dfh.fillna("FREE")
|
| 43 |
+
dfh = split_multi_users(dfh)
|
| 44 |
+
dfh = dfh[["polling_timestamp", "username", "count"]]
|
| 45 |
+
dfh = dfh.groupby(["polling_timestamp", "username"]).sum()
|
| 46 |
+
dfh = dfh.reset_index()
|
| 47 |
+
dfh = dfh.sort_values(by=["polling_timestamp", "count"], ascending=False)
|
| 48 |
+
fig = px.area(dfh, x="polling_timestamp", y="count", color='username', color_discrete_map={"FREE": "black",}, markers=True)
|
| 49 |
+
return fig, dfh
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def plot_figs():
|
| 53 |
+
fig_now, dfn = plot_now()
|
| 54 |
+
try:
|
| 55 |
+
fig_history, dfh = plot_history()
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(e)
|
| 58 |
+
fig_history = None
|
| 59 |
+
dfh = None
|
| 60 |
+
return fig_now, dfn, fig_history
|
| 61 |
+
|
| 62 |
+
demo = gr.Interface(
|
| 63 |
+
fn=plot_figs,
|
| 64 |
+
inputs = [],
|
| 65 |
+
outputs = [
|
| 66 |
+
gr.Plot(label="GPU Status", elem_classes="plotcss"),
|
| 67 |
+
gr.Dataframe(label="GPU Status Details"),
|
| 68 |
+
gr.Plot(label="History", elem_classes="plotcss"),
|
| 69 |
+
],
|
| 70 |
+
live=True,
|
| 71 |
+
flagging_options=[],
|
| 72 |
+
css=".plotcss {max-width: 820px !important;}"
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
if __name__ == "__main__":
|
| 76 |
+
demo.launch(debug=True)
|
data/test.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
test
|
gpus.csv
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
,timestamp,server,name,device,utilization.gpu [%],utilization.memory [%],username
|
| 2 |
+
0,2025/01/30 15:52:03.165,pluslab-a100.cs.ucla.edu,NVIDIA A100-SXM4-40GB,0,100 %,8 %,fabricehc
|
| 3 |
+
1,2025/01/30 15:52:03.189,pluslab-a100.cs.ucla.edu,NVIDIA A100-SXM4-40GB,1,0 %,0 %,fabricehc
|
| 4 |
+
2,2025/01/30 15:52:03.210,pluslab-a100.cs.ucla.edu,NVIDIA A100-SXM4-40GB,2,0 %,0 %,fabricehc
|
| 5 |
+
3,2025/01/30 15:52:03.226,pluslab-a100.cs.ucla.edu,NVIDIA A100-SXM4-40GB,3,0 %,0 %,fabricehc
|
| 6 |
+
0,2025/01/30 15:52:04.328,pluslab01-a100.cs.ucla.edu,NVIDIA A100 80GB PCIe,0,100 %,32 %,lucasbandarkar
|
| 7 |
+
1,2025/01/30 15:52:04.343,pluslab01-a100.cs.ucla.edu,NVIDIA A100 80GB PCIe,1,1 %,0 %,lucasbandarkar
|
| 8 |
+
2,2025/01/30 15:52:04.355,pluslab01-a100.cs.ucla.edu,NVIDIA A100 80GB PCIe,2,92 %,30 %,tparekh
|
| 9 |
+
3,2025/01/30 15:52:04.363,pluslab01-a100.cs.ucla.edu,NVIDIA A100 80GB PCIe,3,100 %,42 %,lucasbandarkar
|
| 10 |
+
4,2025/01/30 15:52:04.372,pluslab01-a100.cs.ucla.edu,NVIDIA A100 80GB PCIe,4,99 %,44 %,hzhang19
|
| 11 |
+
5,2025/01/30 15:52:04.382,pluslab01-a100.cs.ucla.edu,NVIDIA A100 80GB PCIe,5,99 %,46 %,hzhang19
|
| 12 |
+
6,2025/01/30 15:52:04.390,pluslab01-a100.cs.ucla.edu,NVIDIA A100 80GB PCIe,6,99 %,37 %,hzhang19
|
| 13 |
+
7,2025/01/30 15:52:04.397,pluslab01-a100.cs.ucla.edu,NVIDIA A100 80GB PCIe,7,99 %,34 %,hzhang19
|
| 14 |
+
0,2025/01/30 15:52:05.536,pluslab01-h100.cs.ucla.edu,NVIDIA H100 80GB HBM3,0,89 %,39 %,sidilu
|
| 15 |
+
1,2025/01/30 15:52:05.558,pluslab01-h100.cs.ucla.edu,NVIDIA H100 80GB HBM3,1,97 %,42 %,sidilu
|
| 16 |
+
2,2025/01/30 15:52:05.574,pluslab01-h100.cs.ucla.edu,NVIDIA H100 80GB HBM3,2,98 %,39 %,sidilu
|
| 17 |
+
3,2025/01/30 15:52:05.589,pluslab01-h100.cs.ucla.edu,NVIDIA H100 80GB HBM3,3,100 %,44 %,sidilu
|
| 18 |
+
4,2025/01/30 15:52:05.605,pluslab01-h100.cs.ucla.edu,NVIDIA H100 80GB HBM3,4,100 %,44 %,sidilu
|
| 19 |
+
5,2025/01/30 15:52:05.623,pluslab01-h100.cs.ucla.edu,NVIDIA H100 80GB HBM3,5,98 %,39 %,sidilu
|
| 20 |
+
6,2025/01/30 15:52:05.639,pluslab01-h100.cs.ucla.edu,NVIDIA H100 80GB HBM3,6,98 %,38 %,sidilu
|
| 21 |
+
7,2025/01/30 15:52:05.655,pluslab01-h100.cs.ucla.edu,NVIDIA H100 80GB HBM3,7,100 %,43 %,sidilu
|
| 22 |
+
0,2025/01/30 15:52:07.066,pluslab-a6000.cs.ucla.edu,NVIDIA RTX A6000,0,12 %,3 %,fabricehc
|
| 23 |
+
1,2025/01/30 15:52:07.072,pluslab-a6000.cs.ucla.edu,NVIDIA RTX A6000,1,0 %,0 %,
|
| 24 |
+
2,2025/01/30 15:52:07.077,pluslab-a6000.cs.ucla.edu,NVIDIA RTX A6000,2,0 %,0 %,tparekh
|
| 25 |
+
3,2025/01/30 15:52:07.082,pluslab-a6000.cs.ucla.edu,NVIDIA RTX A6000,3,87 %,40 %,tparekh
|
| 26 |
+
4,2025/01/30 15:52:07.086,pluslab-a6000.cs.ucla.edu,NVIDIA RTX A6000,4,97 %,97 %,meithnav
|
| 27 |
+
5,2025/01/30 15:52:07.092,pluslab-a6000.cs.ucla.edu,NVIDIA RTX A6000,5,70 %,38 %,"tparekh, meithnav"
|
| 28 |
+
6,2025/01/30 15:52:07.096,pluslab-a6000.cs.ucla.edu,NVIDIA RTX A6000,6,100 %,86 %,"tparekh, meithnav"
|
| 29 |
+
7,2025/01/30 15:52:07.101,pluslab-a6000.cs.ucla.edu,NVIDIA RTX A6000,7,96 %,96 %,"tparekh, meithnav"
|
| 30 |
+
0,2025/01/30 15:52:08.238,pluslab01-a6000.cs.ucla.edu,NVIDIA RTX A6000,0,95 %,86 %,yunzhi
|
| 31 |
+
1,2025/01/30 15:52:08.244,pluslab01-a6000.cs.ucla.edu,NVIDIA RTX A6000,1,83 %,43 %,tparekh
|
| 32 |
+
2,2025/01/30 15:52:08.247,pluslab01-a6000.cs.ucla.edu,NVIDIA RTX A6000,2,80 %,46 %,tparekh
|
| 33 |
+
3,2025/01/30 15:52:08.249,pluslab01-a6000.cs.ucla.edu,NVIDIA RTX A6000,3,0 %,0 %,tparekh
|
| 34 |
+
4,2025/01/30 15:52:08.253,pluslab01-a6000.cs.ucla.edu,NVIDIA RTX A6000,4,0 %,0 %,
|
| 35 |
+
5,2025/01/30 15:52:08.257,pluslab01-a6000.cs.ucla.edu,NVIDIA RTX A6000,5,94 %,96 %,yunzhi
|
| 36 |
+
6,2025/01/30 15:52:08.261,pluslab01-a6000.cs.ucla.edu,NVIDIA RTX A6000,6,0 %,0 %,
|
| 37 |
+
7,2025/01/30 15:52:08.265,pluslab01-a6000.cs.ucla.edu,NVIDIA RTX A6000,7,0 %,0 %,lucasbandarkar
|
| 38 |
+
0,2025/01/30 15:52:09.990,pluslab02-a6000.cs.ucla.edu,NVIDIA RTX A6000,0,95 %,95 %,meithnav
|
| 39 |
+
1,2025/01/30 15:52:10.013,pluslab02-a6000.cs.ucla.edu,NVIDIA RTX A6000,1,97 %,100 %,meithnav
|
| 40 |
+
2,2025/01/30 15:52:10.030,pluslab02-a6000.cs.ucla.edu,NVIDIA RTX A6000,2,97 %,100 %,meithnav
|
| 41 |
+
3,2025/01/30 15:52:10.048,pluslab02-a6000.cs.ucla.edu,NVIDIA RTX A6000,3,98 %,100 %,meithnav
|
| 42 |
+
4,2025/01/30 15:52:10.070,pluslab02-a6000.cs.ucla.edu,NVIDIA RTX A6000,4,99 %,100 %,hzhang19
|
| 43 |
+
5,2025/01/30 15:52:10.089,pluslab02-a6000.cs.ucla.edu,NVIDIA RTX A6000,5,99 %,71 %,"tparekh, hzhang19"
|
| 44 |
+
6,2025/01/30 15:52:10.117,pluslab02-a6000.cs.ucla.edu,NVIDIA RTX A6000,6,25 %,9 %,"tparekh, hzhang19"
|
| 45 |
+
7,2025/01/30 15:52:10.133,pluslab02-a6000.cs.ucla.edu,NVIDIA RTX A6000,7,71 %,39 %,tparekh
|
history.pkl.gz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51419de7243376d764d9934d8f886c6fa6cd0f240e32f5b7626da757a1da0b45
|
| 3 |
+
size 213660
|
requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
plotly
|