test_test / trackio /ui /system_page.py
abidlabs's picture
abidlabs HF Staff
Upload folder using huggingface_hub
1834e19 verified
"""The System Metrics page for the Trackio UI (GPU metrics, etc.)."""
import gradio as gr
import pandas as pd
import trackio.utils as utils
from trackio.sqlite_storage import SQLiteStorage
from trackio.ui import fns
from trackio.ui.components.colored_checkbox import ColoredCheckboxGroup
from trackio.ui.helpers.run_selection import RunSelection
def get_runs(project) -> list[str]:
if not project:
return []
return SQLiteStorage.get_runs(project)
def refresh_runs(
project: str | None,
filter_text: str | None,
selection: RunSelection,
):
if project is None:
runs: list[str] = []
else:
runs = get_runs(project)
if filter_text:
runs = [r for r in runs if filter_text in r]
did_change = selection.update_choices(runs)
return (
fns.run_checkbox_update(selection) if did_change else gr.skip(),
gr.Textbox(label=f"Runs ({len(runs)})"),
selection,
)
def load_system_data(
project: str | None,
run: str | None,
) -> pd.DataFrame | None:
if not project or not run:
return None
logs = SQLiteStorage.get_system_logs(project, run)
if not logs:
return None
df = pd.DataFrame(logs)
if "timestamp" in df.columns:
df["timestamp"] = pd.to_datetime(df["timestamp"])
first_timestamp = df["timestamp"].min()
df["time"] = (df["timestamp"] - first_timestamp).dt.total_seconds()
df["run"] = run
return df
with gr.Blocks() as system_page:
with gr.Sidebar() as sidebar:
logo = fns.create_logo()
project_dd = fns.create_project_dropdown()
with gr.Group():
run_tb = gr.Textbox(label="Runs", placeholder="Type to filter...")
run_cb = ColoredCheckboxGroup(choices=[], colors=[], label="Runs")
gr.HTML("<hr>")
realtime_cb = gr.Checkbox(label="Refresh metrics realtime", value=True)
smoothing_slider = gr.Slider(
label="Smoothing Factor",
minimum=0,
maximum=20,
value=0,
step=1,
info="0 = no smoothing",
)
navbar = fns.create_navbar()
timer = gr.Timer(value=1)
run_selection_state = gr.State(RunSelection())
x_lim = gr.State(None)
last_system_update = gr.State({})
def toggle_timer(cb_value):
if cb_value:
return gr.Timer(active=True)
else:
return gr.Timer(active=False)
def update_x_lim(select_data: gr.SelectData):
return select_data.index
def check_system_metrics_update(project: str | None, runs: list[str]) -> dict:
if not project or not runs:
return {}
result = {}
for run in runs:
logs = SQLiteStorage.get_system_logs(project, run)
result[run] = len(logs) if logs else 0
return result
@gr.render(
triggers=[
system_page.load,
run_cb.change,
last_system_update.change,
smoothing_slider.change,
x_lim.change,
],
inputs=[
project_dd,
run_cb,
smoothing_slider,
x_lim,
run_selection_state,
],
show_progress="hidden",
queue=False,
)
def update_system_dashboard(
project,
runs,
smoothing_granularity,
x_lim_value,
selection,
):
dfs = []
original_runs = runs.copy() if runs else []
for run in runs:
df = load_system_data(project, run)
if df is not None:
dfs.append(df)
if not dfs:
if not SQLiteStorage.has_system_metrics(project) if project else True:
gr.Markdown(
"""
## No System Metrics Available
System metrics (GPU) will appear here once logged. To enable automatic GPU logging:
```python
import trackio
# GPU logging is auto-enabled when nvidia-ml-py is installed and a GPU is detected
run = trackio.init(project="my-project")
# Or explicitly enable it:
run = trackio.init(project="my-project", auto_log_gpu=True)
# You can also manually log GPU metrics:
trackio.log_gpu()
```
"""
)
else:
gr.Markdown("*Select runs to view system metrics*")
return
master_df = pd.concat(dfs, ignore_index=True)
if master_df.empty:
gr.Markdown("*No system metrics found for selected runs*")
return
x_column = "time"
numeric_cols = master_df.select_dtypes(include="number").columns
numeric_cols = [c for c in numeric_cols if c not in ["time", "timestamp"]]
if smoothing_granularity > 0:
window_size = max(3, min(smoothing_granularity, len(master_df)))
for col in numeric_cols:
master_df[col] = master_df.groupby("run")[col].transform(
lambda x: x.rolling(
window=window_size, center=True, min_periods=1
).mean()
)
ordered_groups, nested_metric_groups = utils.order_metrics_by_plot_preference(
list(numeric_cols)
)
all_runs = selection.choices if selection else original_runs
color_map = utils.get_color_mapping(all_runs, False)
metric_idx = 0
for group_name in ordered_groups:
group_data = nested_metric_groups[group_name]
total_plot_count = sum(
1
for m in group_data["direct_metrics"]
if not master_df.dropna(subset=[m]).empty
) + sum(
sum(1 for m in metrics if not master_df.dropna(subset=[m]).empty)
for metrics in group_data["subgroups"].values()
)
group_label = (
f"{group_name} ({total_plot_count})"
if total_plot_count > 0
else group_name
)
with gr.Accordion(
label=group_label,
open=True,
key=f"sys-accordion-{group_name}",
preserved_by_key=["value", "open"],
):
if group_data["direct_metrics"]:
with gr.Draggable(
key=f"sys-row-{group_name}-direct", orientation="row"
):
for metric_name in group_data["direct_metrics"]:
metric_df = master_df.dropna(subset=[metric_name])
color = "run" if "run" in metric_df.columns else None
downsampled_df, updated_x_lim = utils.downsample(
metric_df,
x_column,
metric_name,
color,
x_lim_value,
)
if not metric_df.empty:
plot = gr.LinePlot(
downsampled_df,
x=x_column,
y=metric_name,
x_title="Time (seconds)",
y_title=metric_name.split("/")[-1],
color=color,
color_map=color_map,
colors_in_legend=original_runs,
title=metric_name,
key=f"sys-plot-{metric_idx}",
preserved_by_key=None,
buttons=["fullscreen", "export"],
x_lim=updated_x_lim,
min_width=400,
)
plot.select(
update_x_lim,
outputs=x_lim,
key=f"sys-select-{metric_idx}",
)
plot.double_click(
lambda: None,
outputs=x_lim,
key=f"sys-double-{metric_idx}",
)
metric_idx += 1
if group_data["subgroups"]:
for subgroup_name in sorted(group_data["subgroups"].keys()):
subgroup_metrics = group_data["subgroups"][subgroup_name]
subgroup_plot_count = sum(
1
for m in subgroup_metrics
if not master_df.dropna(subset=[m]).empty
)
subgroup_label = (
f"{subgroup_name} ({subgroup_plot_count})"
if subgroup_plot_count > 0
else subgroup_name
)
with gr.Accordion(
label=subgroup_label,
open=True,
key=f"sys-accordion-{group_name}-{subgroup_name}",
preserved_by_key=["value", "open"],
):
with gr.Draggable(
key=f"sys-row-{group_name}-{subgroup_name}",
orientation="row",
):
for metric_name in subgroup_metrics:
metric_df = master_df.dropna(subset=[metric_name])
color = (
"run" if "run" in metric_df.columns else None
)
downsampled_df, updated_x_lim = utils.downsample(
metric_df,
x_column,
metric_name,
color,
x_lim_value,
)
if not metric_df.empty:
plot = gr.LinePlot(
downsampled_df,
x=x_column,
y=metric_name,
x_title="Time (seconds)",
y_title=metric_name.split("/")[-1],
color=color,
color_map=color_map,
colors_in_legend=original_runs,
title=metric_name,
key=f"sys-plot-{metric_idx}",
preserved_by_key=None,
buttons=["fullscreen", "export"],
x_lim=updated_x_lim,
min_width=400,
)
plot.select(
update_x_lim,
outputs=x_lim,
key=f"sys-select-{metric_idx}",
)
plot.double_click(
lambda: None,
outputs=x_lim,
key=f"sys-double-{metric_idx}",
)
metric_idx += 1
gr.on(
[timer.tick],
fn=lambda: gr.Dropdown(info=fns.get_project_info()),
outputs=[project_dd],
show_progress="hidden",
api_visibility="private",
)
gr.on(
[timer.tick],
fn=refresh_runs,
inputs=[project_dd, run_tb, run_selection_state],
outputs=[run_cb, run_tb, run_selection_state],
show_progress="hidden",
api_visibility="private",
)
gr.on(
[timer.tick],
fn=check_system_metrics_update,
inputs=[project_dd, run_cb],
outputs=last_system_update,
show_progress="hidden",
api_visibility="private",
)
gr.on(
[system_page.load],
fn=fns.get_projects,
outputs=project_dd,
show_progress="hidden",
queue=False,
api_visibility="private",
).then(
fns.update_navbar_value,
inputs=[project_dd],
outputs=[navbar],
show_progress="hidden",
api_visibility="private",
queue=False,
)
gr.on(
[system_page.load, project_dd.change],
fn=refresh_runs,
inputs=[project_dd, run_tb, run_selection_state],
outputs=[run_cb, run_tb, run_selection_state],
show_progress="hidden",
queue=False,
api_visibility="private",
).then(
fns.update_navbar_value,
inputs=[project_dd],
outputs=[navbar],
show_progress="hidden",
api_visibility="private",
queue=False,
)
realtime_cb.change(
fn=toggle_timer,
inputs=realtime_cb,
outputs=timer,
api_visibility="private",
queue=False,
)
run_cb.input(
fn=fns.handle_run_checkbox_change,
inputs=[run_cb, run_selection_state],
outputs=run_selection_state,
api_visibility="private",
queue=False,
)
run_tb.input(
fn=refresh_runs,
inputs=[project_dd, run_tb, run_selection_state],
outputs=[run_cb, run_tb, run_selection_state],
api_visibility="private",
queue=False,
show_progress="hidden",
)