# pip install dash plotly nvidia-ml-py dash-daq import sys import dash from dash import dcc, html from dash.dependencies import Input, Output import plotly.graph_objs as go import nvidia_smi from collections import defaultdict, deque import dash_daq as daq # For LED displays # Initialize deques to store historical data for each GPU max_history_length = 100 # Number of data points to store gpu_histories = defaultdict(lambda: { "time_history": deque(maxlen=max_history_length), "memory_history": deque(maxlen=max_history_length), "utilization_history": deque(maxlen=max_history_length), "memory_percentage_history": deque(maxlen=max_history_length) # New: Percentage memory history }) # Plot template plot_template = "ggplot2" # Function to get GPU memory usage, utilization, and name def get_gpu_metrics(device_id): allocated_memory = 0 free_memory = 0 total_memory = 0 gpu_utilization = 0 gpu_name = "Unknown GPU" try: nvidia_smi.nvmlInit() handle = nvidia_smi.nvmlDeviceGetHandleByIndex(device_id) mem_info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle) util_info = nvidia_smi.nvmlDeviceGetUtilizationRates(handle) gpu_name = nvidia_smi.nvmlDeviceGetName(handle) # in GB total_memory = mem_info.total / (1024 ** 3) # Convert to GB free_memory = mem_info.free / (1024 ** 3) # Convert to GB allocated_memory = (mem_info.total - mem_info.free) / (1024 ** 3) # Convert to GB # in MB allocated_memory_MB = (mem_info.total - mem_info.free) / (1024 ** 2) # Convert to GB gpu_utilization = util_info.gpu # GPU utilization in percentage nvidia_smi.nvmlShutdown() except nvidia_smi.NVMLError as error: print(f"Error fetching GPU metrics: {error}") return allocated_memory, allocated_memory_MB, free_memory, total_memory, gpu_utilization, gpu_name # Get list of available GPUs def get_gpu_list(): gpu_list = [] try: nvidia_smi.nvmlInit() device_count = nvidia_smi.nvmlDeviceGetCount() for i in range(device_count): handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i) gpu_name = nvidia_smi.nvmlDeviceGetName(handle) gpu_list.append({"label": f"GPU {i} ({gpu_name})", "value": i}) nvidia_smi.nvmlShutdown() except nvidia_smi.NVMLError as error: print(f"Error fetching GPU list: {error}") return gpu_list # Dash app setup app = dash.Dash(__name__) app.layout = html.Div([ html.H1("Real-Time GPU Metrics"), html.H4("Ashish"), html.Div([ html.Label("Select GPU:"), dcc.Dropdown( id='gpu-dropdown', options=get_gpu_list(), value=0 # Default to the first GPU ) ]), html.Div([ html.H3("GPU Memory Usage (GB)"), html.Div([ html.Div([ html.Label("Current:"), daq.LEDDisplay(id='memory-current', value="0.00", size=20, color='#FF5E5E') ], style={'display': 'inline-block', 'margin-right': '20px'}), html.Div([ html.Label("Min:"), daq.LEDDisplay(id='memory-min', value="0.00", size=20, color='#FF5E5E') ], style={'display': 'inline-block', 'margin-right': '20px'}), html.Div([ html.Label("Max:"), daq.LEDDisplay(id='memory-max', value="0.00", size=20, color='#FF5E5E') ], style={'display': 'inline-block', 'margin-right': '20px'}), html.Div([ html.Label("Avg:"), daq.LEDDisplay(id='memory-avg', value="0.00", size=20, color='#FF5E5E') ], style={'display': 'inline-block'}) ], style={'display': 'flex', 'margin-bottom': '20px'}), dcc.Graph(id='gpu-memory-graph') ]), html.Div([ html.H3("GPU Memory Usage (%)"), html.Div([ html.Div([ html.Label("Current:"), daq.LEDDisplay(id='memory-percentage-current', value="0.00", size=20, color='#FF5E5E') ], style={'display': 'inline-block', 'margin-right': '20px'}), html.Div([ html.Label("Min:"), daq.LEDDisplay(id='memory-percentage-min', value="0.00", size=20, color='#FF5E5E') ], style={'display': 'inline-block', 'margin-right': '20px'}), html.Div([ html.Label("Max:"), daq.LEDDisplay(id='memory-percentage-max', value="0.00", size=20, color='#FF5E5E') ], style={'display': 'inline-block', 'margin-right': '20px'}), html.Div([ html.Label("Avg:"), daq.LEDDisplay(id='memory-percentage-avg', value="0.00", size=20, color='#FF5E5E') ], style={'display': 'inline-block'}) ], style={'display': 'flex', 'margin-bottom': '20px'}), dcc.Graph(id='gpu-memory-percentage-graph') ]), html.Div([ html.H3("GPU Utilization (%)"), html.Div([ html.Div([ html.Label("Current:"), daq.LEDDisplay(id='utilization-current', value="0.00", size=20, color='#FF5E5E') ], style={'display': 'inline-block', 'margin-right': '20px'}), html.Div([ html.Label("Min:"), daq.LEDDisplay(id='utilization-min', value="0.00", size=20, color='#FF5E5E') ], style={'display': 'inline-block', 'margin-right': '20px'}), html.Div([ html.Label("Max:"), daq.LEDDisplay(id='utilization-max', value="0.00", size=20, color='#FF5E5E') ], style={'display': 'inline-block', 'margin-right': '20px'}), html.Div([ html.Label("Avg:"), daq.LEDDisplay(id='utilization-avg', value="0.00", size=20, color='#FF5E5E') ], style={'display': 'inline-block'}) ], style={'display': 'flex', 'margin-bottom': '20px'}), dcc.Graph(id='gpu-utilization-graph') ]), dcc.Interval( id='interval-component', interval=1000, # Update every 0.5 seconds n_intervals=0 ) ]) @app.callback( [Output('gpu-memory-graph', 'figure'), Output('gpu-memory-percentage-graph', 'figure'), Output('gpu-utilization-graph', 'figure'), Output('memory-current', 'value'), Output('memory-min', 'value'), Output('memory-max', 'value'), Output('memory-avg', 'value'), Output('memory-percentage-current', 'value'), Output('memory-percentage-min', 'value'), Output('memory-percentage-max', 'value'), Output('memory-percentage-avg', 'value'), Output('utilization-current', 'value'), Output('utilization-min', 'value'), Output('utilization-max', 'value'), Output('utilization-avg', 'value')], [Input('interval-component', 'n_intervals'), Input('gpu-dropdown', 'value')] ) def update_graphs(n, selected_gpu): # Get current GPU metrics allocated_memory, allocated_memory_MB, free_memory, total_memory, gpu_utilization, gpu_name = get_gpu_metrics(selected_gpu) if not allocated_memory: empty_figure = go.Figure( layout=dict( title="No GPU Available", xaxis=dict(visible=False), yaxis=dict(visible=False), annotations=[dict( text="No GPU detected!", xref="paper", yref="paper", showarrow=False, font=dict(size=20) )] ) ) return empty_figure, empty_figure, empty_figure, "0.00", "0.00", "0.00", "0.00", "0.00", "0.00", "0.00", "0.00", "0.00", "0.00", "0.00" # Calculate percentage of allocated memory memory_percentage = (allocated_memory / total_memory) * 100 if total_memory > 0 else 0 # Update historical data for the selected GPU gpu_history = gpu_histories[selected_gpu] gpu_history["time_history"].append(n) # Use interval count as a proxy for time gpu_history["memory_history"].append((allocated_memory, free_memory, total_memory)) gpu_history["utilization_history"].append(gpu_utilization) gpu_history["memory_percentage_history"].append(memory_percentage) # Helper function to calculate min, max, and average def calculate_stats(data): if not data: return 0, 0, 0 return min(data), max(data), sum(data) / len(data) # Calculate stats for memory usage allocated_memory_history = [mem[0] for mem in gpu_history["memory_history"]] min_allocated, max_allocated, avg_allocated = calculate_stats(allocated_memory_history) # Calculate stats for memory percentage min_percentage, max_percentage, avg_percentage = calculate_stats(gpu_history["memory_percentage_history"]) # Calculate stats for GPU utilization min_utilization, max_utilization, avg_utilization = calculate_stats(gpu_history["utilization_history"]) # Create the memory usage line plot memory_figure = go.Figure() memory_figure.add_trace(go.Scatter( x=list(gpu_history["time_history"]), y=allocated_memory_history, # Allocated memory name="Allocated Memory (GB)", mode='lines', line=dict(color='blue') )) memory_figure.add_trace(go.Scatter( x=list(gpu_history["time_history"]), y=[mem[1] for mem in gpu_history["memory_history"]], # Free memory name="Free Memory (GB)", mode='lines', line=dict(color='green') )) memory_figure.add_trace(go.Scatter( x=list(gpu_history["time_history"]), y=[mem[2] for mem in gpu_history["memory_history"]], # Total memory name="Total Memory (GB)", mode='lines', line=dict(color='gray') )) memory_figure.update_layout( title=f"GPU Memory Usage Over Time ({gpu_name})", xaxis_title="Time (s)", yaxis_title="Memory (GB)", legend_title="Memory Type", template=plot_template, xaxis=dict(type='linear'), yaxis=dict(type='linear') ) # Create the percentage GPU memory line plot memory_percentage_figure = go.Figure() memory_percentage_figure.add_trace(go.Scatter( x=list(gpu_history["time_history"]), y=list(gpu_history["memory_percentage_history"]), name="Memory Usage (%)", mode='lines', line=dict(color='purple') )) memory_percentage_figure.update_layout( title=f"GPU Memory Usage Percentage Over Time ({gpu_name})", xaxis_title="Time (s)", yaxis_title="Memory Usage (%)", template=plot_template, xaxis=dict(type='linear'), yaxis=dict(type='linear', range=[0, 100]) ) # Create the GPU utilization line plot utilization_figure = go.Figure() utilization_figure.add_trace(go.Scatter( x=list(gpu_history["time_history"]), y=list(gpu_history["utilization_history"]), name="GPU Utilization (%)", mode='lines', line=dict(color='orange') )) utilization_figure.update_layout( title=f"GPU Utilization Over Time ({gpu_name})", xaxis_title="Time (s)", yaxis_title="Utilization (%)", template=plot_template, xaxis=dict(type='linear'), yaxis=dict(type='linear', range=[0, 100]) ) # Return figures and LED display values return ( memory_figure, memory_percentage_figure, utilization_figure, f"{allocated_memory_MB:.2f}", f"{min_allocated:.2f}", f"{max_allocated:.2f}", f"{avg_allocated:.2f}", f"{memory_percentage:.2f}", f"{min_percentage:.2f}", f"{max_percentage:.2f}", f"{avg_percentage:.2f}", f"{gpu_utilization:.2f}", f"{min_utilization:.2f}", f"{max_utilization:.2f}", f"{avg_utilization:.2f}" ) if __name__ == '__main__': args = sys.argv app.run_server(host=f'10.119.2.{args[1]}', port=8050, debug=True)