Vaani-Audio2Img-LDM / tools /_2_stats.py

LDM-train-pass, checking results

87ef7b5 verified 9 months ago

12.1 kB

	# pip install dash plotly nvidia-ml-py dash-daq

	import sys
	import dash
	from dash import dcc, html
	from dash.dependencies import Input, Output
	import plotly.graph_objs as go
	import nvidia_smi
	from collections import defaultdict, deque
	import dash_daq as daq # For LED displays

	# Initialize deques to store historical data for each GPU
	max_history_length = 100 # Number of data points to store
	gpu_histories = defaultdict(lambda: {
	"time_history": deque(maxlen=max_history_length),
	"memory_history": deque(maxlen=max_history_length),
	"utilization_history": deque(maxlen=max_history_length),
	"memory_percentage_history": deque(maxlen=max_history_length) # New: Percentage memory history
	})

	# Plot template
	plot_template = "ggplot2"

	# Function to get GPU memory usage, utilization, and name
	def get_gpu_metrics(device_id):
	allocated_memory = 0
	free_memory = 0
	total_memory = 0
	gpu_utilization = 0
	gpu_name = "Unknown GPU"
	try:
	nvidia_smi.nvmlInit()
	handle = nvidia_smi.nvmlDeviceGetHandleByIndex(device_id)
	mem_info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
	util_info = nvidia_smi.nvmlDeviceGetUtilizationRates(handle)
	gpu_name = nvidia_smi.nvmlDeviceGetName(handle)

	# in GB
	total_memory = mem_info.total / (1024 ** 3) # Convert to GB
	free_memory = mem_info.free / (1024 ** 3) # Convert to GB
	allocated_memory = (mem_info.total - mem_info.free) / (1024 ** 3) # Convert to GB

	# in MB
	allocated_memory_MB = (mem_info.total - mem_info.free) / (1024 ** 2) # Convert to GB

	gpu_utilization = util_info.gpu # GPU utilization in percentage
	nvidia_smi.nvmlShutdown()
	except nvidia_smi.NVMLError as error:
	print(f"Error fetching GPU metrics: {error}")
	return allocated_memory, allocated_memory_MB, free_memory, total_memory, gpu_utilization, gpu_name

	# Get list of available GPUs
	def get_gpu_list():
	gpu_list = []
	try:
	nvidia_smi.nvmlInit()
	device_count = nvidia_smi.nvmlDeviceGetCount()
	for i in range(device_count):
	handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i)
	gpu_name = nvidia_smi.nvmlDeviceGetName(handle)
	gpu_list.append({"label": f"GPU {i} ({gpu_name})", "value": i})
	nvidia_smi.nvmlShutdown()
	except nvidia_smi.NVMLError as error:
	print(f"Error fetching GPU list: {error}")
	return gpu_list

	# Dash app setup
	app = dash.Dash(__name__)

	app.layout = html.Div([
	html.H1("Real-Time GPU Metrics"),
	html.H4("Ashish"),
	html.Div([
	html.Label("Select GPU:"),
	dcc.Dropdown(
	id='gpu-dropdown',
	options=get_gpu_list(),
	value=0 # Default to the first GPU
	)
	]),
	html.Div([
	html.H3("GPU Memory Usage (GB)"),
	html.Div([
	html.Div([
	html.Label("Current:"),
	daq.LEDDisplay(id='memory-current', value="0.00", size=20, color='#FF5E5E')
	], style={'display': 'inline-block', 'margin-right': '20px'}),
	html.Div([
	html.Label("Min:"),
	daq.LEDDisplay(id='memory-min', value="0.00", size=20, color='#FF5E5E')
	], style={'display': 'inline-block', 'margin-right': '20px'}),
	html.Div([
	html.Label("Max:"),
	daq.LEDDisplay(id='memory-max', value="0.00", size=20, color='#FF5E5E')
	], style={'display': 'inline-block', 'margin-right': '20px'}),
	html.Div([
	html.Label("Avg:"),
	daq.LEDDisplay(id='memory-avg', value="0.00", size=20, color='#FF5E5E')
	], style={'display': 'inline-block'})
	], style={'display': 'flex', 'margin-bottom': '20px'}),
	dcc.Graph(id='gpu-memory-graph')
	]),
	html.Div([
	html.H3("GPU Memory Usage (%)"),
	html.Div([
	html.Div([
	html.Label("Current:"),
	daq.LEDDisplay(id='memory-percentage-current', value="0.00", size=20, color='#FF5E5E')
	], style={'display': 'inline-block', 'margin-right': '20px'}),
	html.Div([
	html.Label("Min:"),
	daq.LEDDisplay(id='memory-percentage-min', value="0.00", size=20, color='#FF5E5E')
	], style={'display': 'inline-block', 'margin-right': '20px'}),
	html.Div([
	html.Label("Max:"),
	daq.LEDDisplay(id='memory-percentage-max', value="0.00", size=20, color='#FF5E5E')
	], style={'display': 'inline-block', 'margin-right': '20px'}),
	html.Div([
	html.Label("Avg:"),
	daq.LEDDisplay(id='memory-percentage-avg', value="0.00", size=20, color='#FF5E5E')
	], style={'display': 'inline-block'})
	], style={'display': 'flex', 'margin-bottom': '20px'}),
	dcc.Graph(id='gpu-memory-percentage-graph')
	]),
	html.Div([
	html.H3("GPU Utilization (%)"),
	html.Div([
	html.Div([
	html.Label("Current:"),
	daq.LEDDisplay(id='utilization-current', value="0.00", size=20, color='#FF5E5E')
	], style={'display': 'inline-block', 'margin-right': '20px'}),
	html.Div([
	html.Label("Min:"),
	daq.LEDDisplay(id='utilization-min', value="0.00", size=20, color='#FF5E5E')
	], style={'display': 'inline-block', 'margin-right': '20px'}),
	html.Div([
	html.Label("Max:"),
	daq.LEDDisplay(id='utilization-max', value="0.00", size=20, color='#FF5E5E')
	], style={'display': 'inline-block', 'margin-right': '20px'}),
	html.Div([
	html.Label("Avg:"),
	daq.LEDDisplay(id='utilization-avg', value="0.00", size=20, color='#FF5E5E')
	], style={'display': 'inline-block'})
	], style={'display': 'flex', 'margin-bottom': '20px'}),
	dcc.Graph(id='gpu-utilization-graph')
	]),
	dcc.Interval(
	id='interval-component',
	interval=1000, # Update every 0.5 seconds
	n_intervals=0
	)
	])

	@app.callback(
	[Output('gpu-memory-graph', 'figure'),
	Output('gpu-memory-percentage-graph', 'figure'),
	Output('gpu-utilization-graph', 'figure'),
	Output('memory-current', 'value'),
	Output('memory-min', 'value'),
	Output('memory-max', 'value'),
	Output('memory-avg', 'value'),
	Output('memory-percentage-current', 'value'),
	Output('memory-percentage-min', 'value'),
	Output('memory-percentage-max', 'value'),
	Output('memory-percentage-avg', 'value'),
	Output('utilization-current', 'value'),
	Output('utilization-min', 'value'),
	Output('utilization-max', 'value'),
	Output('utilization-avg', 'value')],
	[Input('interval-component', 'n_intervals'),
	Input('gpu-dropdown', 'value')]
	)
	def update_graphs(n, selected_gpu):
	# Get current GPU metrics
	allocated_memory, allocated_memory_MB, free_memory, total_memory, gpu_utilization, gpu_name = get_gpu_metrics(selected_gpu)

	if not allocated_memory:
	empty_figure = go.Figure(
	layout=dict(
	title="No GPU Available",
	xaxis=dict(visible=False),
	yaxis=dict(visible=False),
	annotations=[dict(
	text="No GPU detected!",
	xref="paper",
	yref="paper",
	showarrow=False,
	font=dict(size=20)
	)]
	)
	)
	return empty_figure, empty_figure, empty_figure, "0.00", "0.00", "0.00", "0.00", "0.00", "0.00", "0.00", "0.00", "0.00", "0.00", "0.00"

	# Calculate percentage of allocated memory
	memory_percentage = (allocated_memory / total_memory) * 100 if total_memory > 0 else 0

	# Update historical data for the selected GPU
	gpu_history = gpu_histories[selected_gpu]
	gpu_history["time_history"].append(n) # Use interval count as a proxy for time
	gpu_history["memory_history"].append((allocated_memory, free_memory, total_memory))
	gpu_history["utilization_history"].append(gpu_utilization)
	gpu_history["memory_percentage_history"].append(memory_percentage)

	# Helper function to calculate min, max, and average
	def calculate_stats(data):
	if not data:
	return 0, 0, 0
	return min(data), max(data), sum(data) / len(data)

	# Calculate stats for memory usage
	allocated_memory_history = [mem[0] for mem in gpu_history["memory_history"]]
	min_allocated, max_allocated, avg_allocated = calculate_stats(allocated_memory_history)

	# Calculate stats for memory percentage
	min_percentage, max_percentage, avg_percentage = calculate_stats(gpu_history["memory_percentage_history"])

	# Calculate stats for GPU utilization
	min_utilization, max_utilization, avg_utilization = calculate_stats(gpu_history["utilization_history"])

	# Create the memory usage line plot
	memory_figure = go.Figure()
	memory_figure.add_trace(go.Scatter(
	x=list(gpu_history["time_history"]),
	y=allocated_memory_history, # Allocated memory
	name="Allocated Memory (GB)",
	mode='lines',
	line=dict(color='blue')
	))
	memory_figure.add_trace(go.Scatter(
	x=list(gpu_history["time_history"]),
	y=[mem[1] for mem in gpu_history["memory_history"]], # Free memory
	name="Free Memory (GB)",
	mode='lines',
	line=dict(color='green')
	))
	memory_figure.add_trace(go.Scatter(
	x=list(gpu_history["time_history"]),
	y=[mem[2] for mem in gpu_history["memory_history"]], # Total memory
	name="Total Memory (GB)",
	mode='lines',
	line=dict(color='gray')
	))
	memory_figure.update_layout(
	title=f"GPU Memory Usage Over Time ({gpu_name})",
	xaxis_title="Time (s)",
	yaxis_title="Memory (GB)",
	legend_title="Memory Type",
	template=plot_template,
	xaxis=dict(type='linear'),
	yaxis=dict(type='linear')
	)

	# Create the percentage GPU memory line plot
	memory_percentage_figure = go.Figure()
	memory_percentage_figure.add_trace(go.Scatter(
	x=list(gpu_history["time_history"]),
	y=list(gpu_history["memory_percentage_history"]),
	name="Memory Usage (%)",
	mode='lines',
	line=dict(color='purple')
	))
	memory_percentage_figure.update_layout(
	title=f"GPU Memory Usage Percentage Over Time ({gpu_name})",
	xaxis_title="Time (s)",
	yaxis_title="Memory Usage (%)",
	template=plot_template,
	xaxis=dict(type='linear'),
	yaxis=dict(type='linear', range=[0, 100])
	)

	# Create the GPU utilization line plot
	utilization_figure = go.Figure()
	utilization_figure.add_trace(go.Scatter(
	x=list(gpu_history["time_history"]),
	y=list(gpu_history["utilization_history"]),
	name="GPU Utilization (%)",
	mode='lines',
	line=dict(color='orange')
	))
	utilization_figure.update_layout(
	title=f"GPU Utilization Over Time ({gpu_name})",
	xaxis_title="Time (s)",
	yaxis_title="Utilization (%)",
	template=plot_template,
	xaxis=dict(type='linear'),
	yaxis=dict(type='linear', range=[0, 100])
	)

	# Return figures and LED display values
	return (
	memory_figure,
	memory_percentage_figure,
	utilization_figure,
	f"{allocated_memory_MB:.2f}",
	f"{min_allocated:.2f}",
	f"{max_allocated:.2f}",
	f"{avg_allocated:.2f}",
	f"{memory_percentage:.2f}",
	f"{min_percentage:.2f}",
	f"{max_percentage:.2f}",
	f"{avg_percentage:.2f}",
	f"{gpu_utilization:.2f}",
	f"{min_utilization:.2f}",
	f"{max_utilization:.2f}",
	f"{avg_utilization:.2f}"
	)

	if __name__ == '__main__':
	args = sys.argv
	app.run_server(host=f'10.119.2.{args[1]}', port=8050, debug=True)