Spaces:
Sleeping
Sleeping
| """The System Metrics page for the Trackio UI (GPU metrics, etc.).""" | |
| import gradio as gr | |
| import pandas as pd | |
| import trackio.utils as utils | |
| from trackio.sqlite_storage import SQLiteStorage | |
| from trackio.ui import fns | |
| from trackio.ui.components.colored_checkbox import ColoredCheckboxGroup | |
| from trackio.ui.helpers.run_selection import RunSelection | |
| def get_runs(project) -> list[str]: | |
| if not project: | |
| return [] | |
| return SQLiteStorage.get_runs(project) | |
| def refresh_runs( | |
| project: str | None, | |
| filter_text: str | None, | |
| selection: RunSelection, | |
| ): | |
| if project is None: | |
| runs: list[str] = [] | |
| else: | |
| runs = get_runs(project) | |
| if filter_text: | |
| runs = [r for r in runs if filter_text in r] | |
| did_change = selection.update_choices(runs) | |
| return ( | |
| fns.run_checkbox_update(selection) if did_change else gr.skip(), | |
| gr.Textbox(label=f"Runs ({len(runs)})"), | |
| selection, | |
| ) | |
| def load_system_data( | |
| project: str | None, | |
| run: str | None, | |
| ) -> pd.DataFrame | None: | |
| if not project or not run: | |
| return None | |
| logs = SQLiteStorage.get_system_logs(project, run) | |
| if not logs: | |
| return None | |
| df = pd.DataFrame(logs) | |
| if "timestamp" in df.columns: | |
| df["timestamp"] = pd.to_datetime(df["timestamp"]) | |
| first_timestamp = df["timestamp"].min() | |
| df["time"] = (df["timestamp"] - first_timestamp).dt.total_seconds() | |
| df["run"] = run | |
| return df | |
| with gr.Blocks() as system_page: | |
| with gr.Sidebar() as sidebar: | |
| logo = fns.create_logo() | |
| project_dd = fns.create_project_dropdown() | |
| with gr.Group(): | |
| run_tb = gr.Textbox(label="Runs", placeholder="Type to filter...") | |
| run_cb = ColoredCheckboxGroup(choices=[], colors=[], label="Runs") | |
| gr.HTML("<hr>") | |
| realtime_cb = gr.Checkbox(label="Refresh metrics realtime", value=True) | |
| smoothing_slider = gr.Slider( | |
| label="Smoothing Factor", | |
| minimum=0, | |
| maximum=20, | |
| value=0, | |
| step=1, | |
| info="0 = no smoothing", | |
| ) | |
| navbar = fns.create_navbar() | |
| timer = gr.Timer(value=1) | |
| run_selection_state = gr.State(RunSelection()) | |
| x_lim = gr.State(None) | |
| last_system_update = gr.State({}) | |
| def toggle_timer(cb_value): | |
| if cb_value: | |
| return gr.Timer(active=True) | |
| else: | |
| return gr.Timer(active=False) | |
| def update_x_lim(select_data: gr.SelectData): | |
| return select_data.index | |
| def check_system_metrics_update(project: str | None, runs: list[str]) -> dict: | |
| if not project or not runs: | |
| return {} | |
| result = {} | |
| for run in runs: | |
| logs = SQLiteStorage.get_system_logs(project, run) | |
| result[run] = len(logs) if logs else 0 | |
| return result | |
| def update_system_dashboard( | |
| project, | |
| runs, | |
| smoothing_granularity, | |
| x_lim_value, | |
| selection, | |
| ): | |
| dfs = [] | |
| original_runs = runs.copy() if runs else [] | |
| for run in runs: | |
| df = load_system_data(project, run) | |
| if df is not None: | |
| dfs.append(df) | |
| if not dfs: | |
| if not SQLiteStorage.has_system_metrics(project) if project else True: | |
| gr.Markdown( | |
| """ | |
| ## No System Metrics Available | |
| System metrics (GPU) will appear here once logged. To enable automatic GPU logging: | |
| ```python | |
| import trackio | |
| # GPU logging is auto-enabled when nvidia-ml-py is installed and a GPU is detected | |
| run = trackio.init(project="my-project") | |
| # Or explicitly enable it: | |
| run = trackio.init(project="my-project", auto_log_gpu=True) | |
| # You can also manually log GPU metrics: | |
| trackio.log_gpu() | |
| ``` | |
| """ | |
| ) | |
| else: | |
| gr.Markdown("*Select runs to view system metrics*") | |
| return | |
| master_df = pd.concat(dfs, ignore_index=True) | |
| if master_df.empty: | |
| gr.Markdown("*No system metrics found for selected runs*") | |
| return | |
| x_column = "time" | |
| numeric_cols = master_df.select_dtypes(include="number").columns | |
| numeric_cols = [c for c in numeric_cols if c not in ["time", "timestamp"]] | |
| if smoothing_granularity > 0: | |
| window_size = max(3, min(smoothing_granularity, len(master_df))) | |
| for col in numeric_cols: | |
| master_df[col] = master_df.groupby("run")[col].transform( | |
| lambda x: x.rolling( | |
| window=window_size, center=True, min_periods=1 | |
| ).mean() | |
| ) | |
| ordered_groups, nested_metric_groups = utils.order_metrics_by_plot_preference( | |
| list(numeric_cols) | |
| ) | |
| all_runs = selection.choices if selection else original_runs | |
| color_map = utils.get_color_mapping(all_runs, False) | |
| metric_idx = 0 | |
| for group_name in ordered_groups: | |
| group_data = nested_metric_groups[group_name] | |
| total_plot_count = sum( | |
| 1 | |
| for m in group_data["direct_metrics"] | |
| if not master_df.dropna(subset=[m]).empty | |
| ) + sum( | |
| sum(1 for m in metrics if not master_df.dropna(subset=[m]).empty) | |
| for metrics in group_data["subgroups"].values() | |
| ) | |
| group_label = ( | |
| f"{group_name} ({total_plot_count})" | |
| if total_plot_count > 0 | |
| else group_name | |
| ) | |
| with gr.Accordion( | |
| label=group_label, | |
| open=True, | |
| key=f"sys-accordion-{group_name}", | |
| preserved_by_key=["value", "open"], | |
| ): | |
| if group_data["direct_metrics"]: | |
| with gr.Draggable( | |
| key=f"sys-row-{group_name}-direct", orientation="row" | |
| ): | |
| for metric_name in group_data["direct_metrics"]: | |
| metric_df = master_df.dropna(subset=[metric_name]) | |
| color = "run" if "run" in metric_df.columns else None | |
| downsampled_df, updated_x_lim = utils.downsample( | |
| metric_df, | |
| x_column, | |
| metric_name, | |
| color, | |
| x_lim_value, | |
| ) | |
| if not metric_df.empty: | |
| plot = gr.LinePlot( | |
| downsampled_df, | |
| x=x_column, | |
| y=metric_name, | |
| x_title="Time (seconds)", | |
| y_title=metric_name.split("/")[-1], | |
| color=color, | |
| color_map=color_map, | |
| colors_in_legend=original_runs, | |
| title=metric_name, | |
| key=f"sys-plot-{metric_idx}", | |
| preserved_by_key=None, | |
| buttons=["fullscreen", "export"], | |
| x_lim=updated_x_lim, | |
| min_width=400, | |
| ) | |
| plot.select( | |
| update_x_lim, | |
| outputs=x_lim, | |
| key=f"sys-select-{metric_idx}", | |
| ) | |
| plot.double_click( | |
| lambda: None, | |
| outputs=x_lim, | |
| key=f"sys-double-{metric_idx}", | |
| ) | |
| metric_idx += 1 | |
| if group_data["subgroups"]: | |
| for subgroup_name in sorted(group_data["subgroups"].keys()): | |
| subgroup_metrics = group_data["subgroups"][subgroup_name] | |
| subgroup_plot_count = sum( | |
| 1 | |
| for m in subgroup_metrics | |
| if not master_df.dropna(subset=[m]).empty | |
| ) | |
| subgroup_label = ( | |
| f"{subgroup_name} ({subgroup_plot_count})" | |
| if subgroup_plot_count > 0 | |
| else subgroup_name | |
| ) | |
| with gr.Accordion( | |
| label=subgroup_label, | |
| open=True, | |
| key=f"sys-accordion-{group_name}-{subgroup_name}", | |
| preserved_by_key=["value", "open"], | |
| ): | |
| with gr.Draggable( | |
| key=f"sys-row-{group_name}-{subgroup_name}", | |
| orientation="row", | |
| ): | |
| for metric_name in subgroup_metrics: | |
| metric_df = master_df.dropna(subset=[metric_name]) | |
| color = ( | |
| "run" if "run" in metric_df.columns else None | |
| ) | |
| downsampled_df, updated_x_lim = utils.downsample( | |
| metric_df, | |
| x_column, | |
| metric_name, | |
| color, | |
| x_lim_value, | |
| ) | |
| if not metric_df.empty: | |
| plot = gr.LinePlot( | |
| downsampled_df, | |
| x=x_column, | |
| y=metric_name, | |
| x_title="Time (seconds)", | |
| y_title=metric_name.split("/")[-1], | |
| color=color, | |
| color_map=color_map, | |
| colors_in_legend=original_runs, | |
| title=metric_name, | |
| key=f"sys-plot-{metric_idx}", | |
| preserved_by_key=None, | |
| buttons=["fullscreen", "export"], | |
| x_lim=updated_x_lim, | |
| min_width=400, | |
| ) | |
| plot.select( | |
| update_x_lim, | |
| outputs=x_lim, | |
| key=f"sys-select-{metric_idx}", | |
| ) | |
| plot.double_click( | |
| lambda: None, | |
| outputs=x_lim, | |
| key=f"sys-double-{metric_idx}", | |
| ) | |
| metric_idx += 1 | |
| gr.on( | |
| [timer.tick], | |
| fn=lambda: gr.Dropdown(info=fns.get_project_info()), | |
| outputs=[project_dd], | |
| show_progress="hidden", | |
| api_visibility="private", | |
| ) | |
| gr.on( | |
| [timer.tick], | |
| fn=refresh_runs, | |
| inputs=[project_dd, run_tb, run_selection_state], | |
| outputs=[run_cb, run_tb, run_selection_state], | |
| show_progress="hidden", | |
| api_visibility="private", | |
| ) | |
| gr.on( | |
| [timer.tick], | |
| fn=check_system_metrics_update, | |
| inputs=[project_dd, run_cb], | |
| outputs=last_system_update, | |
| show_progress="hidden", | |
| api_visibility="private", | |
| ) | |
| gr.on( | |
| [system_page.load], | |
| fn=fns.get_projects, | |
| outputs=project_dd, | |
| show_progress="hidden", | |
| queue=False, | |
| api_visibility="private", | |
| ).then( | |
| fns.update_navbar_value, | |
| inputs=[project_dd], | |
| outputs=[navbar], | |
| show_progress="hidden", | |
| api_visibility="private", | |
| queue=False, | |
| ) | |
| gr.on( | |
| [system_page.load, project_dd.change], | |
| fn=refresh_runs, | |
| inputs=[project_dd, run_tb, run_selection_state], | |
| outputs=[run_cb, run_tb, run_selection_state], | |
| show_progress="hidden", | |
| queue=False, | |
| api_visibility="private", | |
| ).then( | |
| fns.update_navbar_value, | |
| inputs=[project_dd], | |
| outputs=[navbar], | |
| show_progress="hidden", | |
| api_visibility="private", | |
| queue=False, | |
| ) | |
| realtime_cb.change( | |
| fn=toggle_timer, | |
| inputs=realtime_cb, | |
| outputs=timer, | |
| api_visibility="private", | |
| queue=False, | |
| ) | |
| run_cb.input( | |
| fn=fns.handle_run_checkbox_change, | |
| inputs=[run_cb, run_selection_state], | |
| outputs=run_selection_state, | |
| api_visibility="private", | |
| queue=False, | |
| ) | |
| run_tb.input( | |
| fn=refresh_runs, | |
| inputs=[project_dd, run_tb, run_selection_state], | |
| outputs=[run_cb, run_tb, run_selection_state], | |
| api_visibility="private", | |
| queue=False, | |
| show_progress="hidden", | |
| ) | |