Spaces:

noamkay
/

temp2

No application file

App Files Files Community

temp2 / vizualize_nn.py

noamkay

Upload folder using huggingface_hub

5621fe8 over 2 years ago

raw

history blame contribute delete

24.1 kB


	# !pip install gradio

	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from torch.utils.data import TensorDataset, DataLoader
	import torch.nn as nn
	import torch.optim as optim
	import torch
	# Visualize the simulated data
	import matplotlib.pyplot as plt
	import plotly.graph_objs as go
	import IPython
	import numpy as np
	from graphviz import Digraph
	import copy
	import plotly.graph_objs as go
	import torch
	import numpy as np
	import colorsys
	from functools import partial
	import gradio as gr # may requeire session restart
	import os
	import uuid
	from contextlib import contextmanager
	NETWORK_ORIENTAION = 'h' # 'h' for horizontal 'v' for vertical
	TEMP_DIR = "/content/temp"
	if not os.path.exists(TEMP_DIR):
	os.makedirs(TEMP_DIR)

	"""## functions"""

	# @title generate data

	def simulate_clusters(noise=0.3,data_points=1000):
	assert data_points%4==0, 'Data points should be dived by 4'
	# Set random seed for reproducibility
	np.random.seed(0)

	# Define means and covariances for the Gaussian distributions
	means = [(-1, -1), (-1, 1), (1, -1), (1, 1)]
	covs = [np.eye(2) * noise for _ in means] # Small covariance for tight clusters

	# Generate samples for each cluster
	cluster_samples = []
	for mean, cov in zip(means, covs):
	samples = np.random.multivariate_normal(mean, cov, data_points//4)
	cluster_samples.append(samples)

	# Concatenate all samples and create labels
	X = np.vstack(cluster_samples)
	y = np.array([i//(data_points//4) for i in range(data_points)]) # Assign labels based on cluster index
	# Clusters [(-1, -1), (1, 1)] have label 0, and [(-1, 1), (1, -1)] have label 1.
	y_adjusted = np.array([0 if i in [0, 3] else 1 for i in y])

	# Split the adjusted dataset
	X_train_adj, X_test_adj, y_train_adj, y_test_adj = train_test_split(X, y_adjusted, test_size=0.2, random_state=42)

	# Normalize the features
	scaler_adj = StandardScaler()
	X_train_scaled_adj = scaler_adj.fit_transform(X_train_adj)
	X_test_scaled_adj = scaler_adj.transform(X_test_adj)

	# Convert to PyTorch tensors
	X_train_tensor_adj = torch.tensor(X_train_scaled_adj, dtype=torch.float32)
	y_train_tensor_adj = torch.tensor(y_train_adj, dtype=torch.long)
	X_test_tensor_adj = torch.tensor(X_test_scaled_adj, dtype=torch.float32)
	y_test_tensor_adj = torch.tensor(y_test_adj, dtype=torch.long)

	return X_train_tensor_adj,y_train_tensor_adj,X_test_tensor_adj,y_test_tensor_adj

	# @title plotting network with activation
	def get_color(activation, base_color=False):
	if base_color:
	# Convert base color from hex to RGB
	r_base, g_base, b_base = int(base_color[1:3], 16), int(base_color[3:5], 16), int(base_color[5:7], 16)

	# Interpolate between the base color and white based on activation
	r = r_base + (255 - r_base) * (1 - activation)
	g = g_base + (255 - g_base) * (1 - activation)
	b = b_base + (255 - b_base) * (1 - activation)

	return f'#{int(r):02x}{int(g):02x}{int(b):02x}'


	else:
	if activation > 0:
	return f"#0000FF{int(activation * 255):02X}" # Blue with varying intensity
	return "#E0E0E0" # Light gray for inactive neurons


	rd = lambda activation: ("\n"+"{:.2f}".format(torch.round(activation,decimals=2).item())) if activation!=1 else ''
	#sigmoid = lambda x: 1 / (1 + torch.exp(-x)) if x!=1 else 1
	softmax = lambda x: torch.exp(x) / torch.sum(torch.exp(x), axis=0) if all(x!=1) else x


	rd = lambda activation: ("\n"+"{:.2f}".format(torch.round(activation,decimals=2).item())) if activation!=1 else ''
	def visualize_network_with_weights(model, activations=False, norm='net', decision_boundary_images=None, width=1, height=1):
	dot = Digraph()
	if NETWORK_ORIENTAION=='h':
	dot.attr(rankdir='LR')
	pos_color = "blue"
	neg_color = "orange"
	layers_weights = {}
	max_weight = 0
	number_of_layer = 3
	# Colors for different layers
	input_color, hidden_color, output_color1,output_color2 = '#90EE90','#D3D3D3', '#FFB6C1' , '#ADD8E6' # light grey, light green,light red, light blue

	# Extract weights for each layer and calculate max weight for normalization
	for name, layer in model.named_children():
	if isinstance(layer, torch.nn.Linear):
	layer_weight = layer.weight.cpu().data.numpy()
	layers_weights[name] = layer_weight
	max_weight = max(max_weight, np.abs(layer_weight).max())
	output_layer_name = name #this evantually save the output layer name
	# Initialize activations if not provided
	if not activations:
	activations = {layer: [1] * weight.shape[0] for layer, weight in layers_weights.items()}

	# Normalize weights for visualization purposes
	layers_weights_norm = {layer: weight / (np.abs(weight).max() if norm == 'layer' else max_weight)
	for layer, weight in layers_weights.items()}
	def add_node_with_border(node_id, label, base_color, activation, image_path=None, shape='circle', border_color='black', border_width=1):
	fill_color = get_color(activation, base_color)
	if image_path:
	dot.node(node_id, label, shape='box', style='filled', fillcolor=fill_color, color=border_color, penwidth=str(border_width),imagescale='both', width=str(width), height=str(height), image=image_path, fixedsize='true')
	else:
	dot.node(node_id, label, shape=shape, style='filled', fillcolor=fill_color, color=border_color, penwidth=str(border_width))
	axis_names = ['X','Y']
	# Add nodes and edges...
	for i in range(layers_weights['fc1'].shape[1]):
	add_node_with_border(f'h0_{i}' , f'X{i} - {axis_names[i]} Axis', input_color, 1.0) # Input nodes are always 'active'

	for layer_i in range(1,number_of_layer):
	layer_name = 'fc'+str(layer_i)
	for i, activation in enumerate(activations[layer_name]):
	image_path = decision_boundary_images[layer_name][i] if decision_boundary_images and layer_name in decision_boundary_images and len(decision_boundary_images[layer_name]) > i else None
	add_node_with_border(f'h{layer_i}_{i}', f'H{layer_i}_{i}{rd(activation)}', hidden_color, activation, image_path=image_path)
	norm_output_activations = softmax(torch.tensor([activations[output_layer_name][0],activations[output_layer_name][1]]))
	activation_label1,activation_label2 = norm_output_activations
	add_node_with_border(f'h{number_of_layer}_0', f"Y0 - Label 0{rd(activation_label1)}", output_color1, activation_label1,shape='doublecircle')
	add_node_with_border(f'h{number_of_layer}_1', f"Y1 - Label 1{rd(activation_label2)}", output_color2, activation_label2,shape='doublecircle')


	# Adding edges between layers
	prev_layer_size = layers_weights[list(layers_weights.keys())[0]].shape[1] # Size of the input layer
	prev_layer_name = 'h0'

	for layer_idx, (layer_name, weight_matrix) in enumerate(layers_weights.items(), start=1):
	current_layer_size = weight_matrix.shape[0]

	for i in range(prev_layer_size):
	for j in range(current_layer_size):
	color = pos_color if weight_matrix[j, i] >= 0 else neg_color
	dot.edge(f'{prev_layer_name}_{i}', f'h{layer_idx}_{j}', penwidth=str(abs(layers_weights_norm[layer_name][j, i]) * 5), color=color)

	prev_layer_size = current_layer_size
	prev_layer_name = f'h{layer_idx}'

	return dot

	# @title Plots (learning curve and decision boundary)
	def plot_decision_boundary(model, X_train, y_train, X_test, y_test, show=True, epoch=''):
	# Set model to evaluation mode
	model.eval()

	# Set min and max values and give it some padding
	x_min, x_max = min(X_train[:, 0].min(), X_test[:, 0].min()) - 1, max(X_train[:, 0].max(), X_test[:, 0].max()) + 1
	y_min, y_max = min(X_train[:, 1].min(), X_test[:, 1].min()) - 1, max(X_train[:, 1].max(), X_test[:, 1].max()) + 1
	h = 0.01

	# Generate a grid of points with distance h between them
	xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

	# Flatten the grid so the values match expected input
	grid = np.c_[xx.ravel(), yy.ravel()]
	grid_tensor = torch.FloatTensor(grid)
	with torch.no_grad():
	predictions = model(grid_tensor.to(model.device)).argmax(1).to('cpu')
	Z = predictions.numpy().reshape(xx.shape)

	# Create the contour plot
	contour = go.Contour(
	x=np.arange(x_min, x_max, h),
	y=np.arange(y_min, y_max, h),
	z=Z,
	colorscale='RdYlBu', # Light colors for background
	showscale=False # Hide the colorbar
	)

	# Separate data based on labels
	train_0 = X_train[y_train == 0]
	train_1 = X_train[y_train == 1]
	test_0 = X_test[y_test == 0]
	test_1 = X_test[y_test == 1]

	# Create scatter plots for each category
	train_0_scatter = go.Scatter(x=train_0[:, 0], y=train_0[:, 1], mode='markers',
	marker=dict(color='red', line=dict(color='black', width=1)),
	name='Train - Label 0')
	train_1_scatter = go.Scatter(x=train_1[:, 0], y=train_1[:, 1], mode='markers',
	marker=dict(color='green', line=dict(color='black', width=1)),
	name='Train - Label 1')
	test_0_scatter = go.Scatter(x=test_0[:, 0], y=test_0[:, 1], mode='markers',
	marker=dict(color='rgba(255, 200, 200, 1)', symbol='circle-open', line=dict(color='black', width=1)),
	name='Test - Label 0')
	test_1_scatter = go.Scatter(x=test_1[:, 0], y=test_1[:, 1], mode='markers',
	marker=dict(color='rgba(200, 255, 200, 1)', symbol='circle-open', line=dict(color='black', width=1)),
	name='Test - Label 1')

	# Define the layout
	layout = go.Layout(
	title='Decision Boundary ' + epoch,
	xaxis=dict(title='Feature 1'),
	yaxis=dict(title='Feature 2'),
	showlegend=True
	)
	# Create the figure and add the contour and scatter plots
	fig = go.Figure(data=[contour, train_0_scatter, train_1_scatter, test_0_scatter, test_1_scatter], layout=layout)

	# Show the plot
	if show: fig.show()
	return fig


	def generate_learning_curve(loss_hist, loss_val_hist, hidden_units, noise, epochs, lr,metric):
	with torch.no_grad():
	metric = 'Loss' if metric.lower()=='loss' else "Accuracy"
	# Create traces for the training and validation loss
	trace_train = go.Scatter(
	x=list(range(1, epochs + 1)),
	y=loss_hist,
	mode='lines',
	name=f'Training {metric}'
	)
	trace_val = go.Scatter(
	x=list(range(1, epochs + 1)),
	y=loss_val_hist,
	mode='lines',
	name=f'Validation {metric}'
	)

	# Combine traces
	data = [trace_train, trace_val]

	# Layout for the plot
	layout = go.Layout(
	title=f'Learning Curve - Hidden Units: {hidden_units}, Noise: {noise}, Learning Rate: {lr}',
	xaxis=dict(title='Epochs'),
	yaxis=dict(title=metric),

	)

	# Create the figure and show it
	fig = go.Figure(data=data, layout=layout)
	return fig

	def save_plot_as_image(fig, remove_axes=True, remove_title=True, remove_colorbar=True, transparent_background=True):
	"""
	Saves a Matplotlib figure as an image and returns the path to the image.

	Args:
	fig (matplotlib.figure.Figure): The Matplotlib figure to save.
	remove_axes (bool): If True, removes the axes from the plot.
	remove_title (bool): If True, removes the title and header from the plot.
	remove_colorbar (bool): If True, removes the colorbar from the plot.
	transparent_background (bool): If True, saves the image with a transparent background.

	Returns:
	str: Path to the saved image file.
	"""
	# Check if fig is a valid Matplotlib figure
	if not isinstance(fig, plt.Figure):
	raise ValueError("The provided object is not a Matplotlib figure.")

	# Remove axes if requested
	if remove_axes:
	for ax in fig.axes:
	ax.get_xaxis().set_visible(False)
	ax.get_yaxis().set_visible(False)
	ax.set_frame_on(False)

	# Remove title and header if requested
	if remove_title:
	fig.suptitle("")
	for ax in fig.axes:
	ax.title.set_visible(False)

	# Remove colorbar if requested
	if remove_colorbar:
	for ax in fig.axes:
	if hasattr(ax, 'collections') and ax.collections:
	# Check for the presence of a colorbar in this axis
	for im in ax.get_images():
	if hasattr(im, 'colorbar') and im.colorbar:
	im.colorbar.remove()

	# Set transparent background if requested
	if transparent_background:
	fig.patch.set_alpha(0)
	for ax in fig.axes:
	ax.patch.set_alpha(0)


	# Generate a unique filename for the image
	filename = f"plot_{uuid.uuid4()}.png"
	file_path = os.path.join(TEMP_DIR, filename)

	# Save the figure with a transparent background if requested
	fig.savefig(file_path, bbox_inches='tight', pad_inches=0, transparent=transparent_background)

	return file_path

	def plot_neuron_decision_boundaries(model, X, step=0.01):
	# Ensure X is a NumPy array
	if isinstance(X, torch.Tensor):
	X = X.cpu().numpy()
	mesh_border_expansion = 0.5 # the mesh is calculted between the highest and lowest values in each axis, with `mesh_border_expansion` additional space
	# Generate mesh grid for decision boundaries
	x_min, x_max = X[:, 0].min() - mesh_border_expansion , X[:, 0].max() + mesh_border_expansion
	y_min, y_max = X[:, 1].min() - mesh_border_expansion , X[:, 1].max() + mesh_border_expansion
	xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step))
	mesh_inputs = torch.Tensor(np.c_[xx.ravel(), yy.ravel()])

	model.eval()
	figures_dict = {}
	layer_outputs = mesh_inputs
	with torch.no_grad():
	for name, layer in model.named_children():
	# Apply the layer
	layer_outputs = layer(layer_outputs.to(model.device))

	# Check if the layer is ReLU or the last layer
	if isinstance(layer, nn.Linear) or (name == list(model.named_children())[-1][0]):
	# Convert to NumPy for plotting
	outputs_np = layer_outputs.cpu().numpy()
	for neuron_idx in range(outputs_np.shape[1]):
	Z = outputs_np[:, neuron_idx].reshape(xx.shape)

	Z_min, Z_max = Z.min(), Z.max()
	levels = sorted([Z_min, 0, Z_max]) if Z_min < 0 < Z_max else [Z_min, Z_max]

	fig, ax = plt.subplots()
	# ax.contourf(xx, yy, Z, levels=np.linspace(Z.min(), Z.max(), 200), cmap=plt.cm.RdBu, alpha=0.8)
	ax.contourf(xx, yy, Z, levels=levels, cmap=plt.cm.RdBu, alpha=0.8)
	# ax.set_title(f"Decision boundary of Neuron {neuron_idx+1} in {name}")
	# ax.set_xlabel('Feature 1')
	# ax.set_ylabel('Feature 2')
	plt.show()
	plt.close(fig)
	if name not in figures_dict:
	figures_dict[name]=[]
	figures_dict[name] += [fig]

	return figures_dict


	# plot_neuron_decision_boundaries( fc_model, X_train)

	# step=0.01
	# x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
	# y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
	# xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step))
	# mesh_inputs = torch.Tensor(np.c_[xx.ravel(), yy.ravel()])
	# mesh_inputs

	# @title network architecture and training

	# Global variables to hold model and data
	global fc_model_hist, X_train, y_train, X_test, y_test
	fc_model_hist, X_train, y_train, X_test, y_test = None, None, None, None, None

	class FCNet(nn.Module):
	def __init__(self,hidden_units,device):
	super(FCNet, self).__init__()
	self.fc1 = nn.Linear(2, hidden_units) # Input layer with 2 features
	self.act_func1 = nn.ReLU() # it is important to declare on each relu layer, becuase some of the plotting functions uses model.named_layers() and the ReLU won't be there without explicit declration here
	self.fc2 = nn.Linear(hidden_units, hidden_units)
	self.act_func2 = nn.ReLU()
	self.fc3 = nn.Linear(hidden_units, 2) # Output layer with 2 neurons (for 2 classes)
	self.device = device
	def forward(self, x):
	x = self.act_func1(self.fc1(x))
	x = self.act_func2(self.fc2(x))
	x = self.fc3(x)
	return x
	def forward_with_activation(self, x):
	inputs = x
	x1 = self.act_func1(self.fc1(x))
	x2 = self.act_func2(self.fc2(x1))
	x3 = self.fc3(x2)
	return x,{'inputs':inputs,'fc1':x1,'fc2':x2,'fc3':x3}
	def to(self, device):
	super().to(device)
	self.device = device
	return self

	def init_net_and_train(hidden_units = 4,noise = 0.2,epochs = 30,data_points = 1000,lr=0.01,device='cpu',metric='acc'):
	global fc_model_hist, X_train, y_train, X_test, y_test
	# Simulate the dataset
	X_train,y_train,X_test,y_test = simulate_clusters(noise,data_points)

	# Create TensorDataset and DataLoader
	train_dataset_adj = TensorDataset(X_train, y_train)
	train_loader_adj = DataLoader(train_dataset_adj, batch_size=64, shuffle=True)
	test_dataset_adj = TensorDataset(X_test, y_test)
	test_loader_adj = DataLoader(test_dataset_adj, batch_size=64, shuffle=True)
	# Define a simple Fully Connected network with fewer neurons
	# Initialize the simple fully connected neural network
	fc_model = FCNet(hidden_units,device=device)
	fc_model.to(device)
	# Loss and optimizer for the FC network
	fc_criterion = nn.CrossEntropyLoss()
	fc_optimizer = optim.Adam(fc_model.parameters(), lr=lr)

	# Training loop for the simple FC network
	fc_model_hist = []

	# loss_hist = []
	# loss_val_hist = []

	# for epoch in range(epochs):
	# cur_epoch_loss=torch.tensor(0.,device=fc_model.device)
	# inputs_len = 0
	# for inputs, labels in train_loader_adj:
	# # Forward pass
	# outputs = fc_model(inputs.to(device))
	# loss = fc_criterion(outputs, labels.to(device))
	# cur_epoch_loss+=loss
	# inputs_len += labels.shape[0]
	# # Backward and optimize
	# fc_optimizer.zero_grad()
	# loss.backward()
	# fc_optimizer.step()
	# train_loss = cur_epoch_loss.cpu()/inputs_len
	# loss_hist.append(train_loss)
	# fc_model_hist.append(copy.deepcopy(fc_model).to('cpu'))
	# with torch.no_grad():
	# cur_epoch_loss=torch.tensor(0.,device=device)
	# inputs_len = 0
	# for inputs, labels in test_loader_adj:
	# outputs = fc_model(inputs.to(device))
	# loss = fc_criterion(outputs, labels.to(device))
	# cur_epoch_loss+=loss
	# inputs_len += labels.shape[0]
	# test_loss = cur_epoch_loss.cpu()/inputs_len
	# loss_val_hist.append(test_loss)

	loss_hist = []
	loss_val_hist = []
	acc_hist = []
	acc_val_hist = []

	device = fc_model.device

	for epoch in range(epochs):
	fc_model.train() # Set model to training mode
	cur_epoch_loss = 0
	correct_train = 0
	total_train = 0

	for inputs, labels in train_loader_adj:
	inputs, labels = inputs.to(device), labels.to(device)
	fc_optimizer.zero_grad()
	outputs = fc_model(inputs)
	loss = fc_criterion(outputs, labels)
	loss.backward()
	fc_optimizer.step()

	cur_epoch_loss += loss.item() * inputs.size(0)
	_, predicted = torch.max(outputs.data, 1)
	total_train += labels.size(0)
	correct_train += (predicted == labels).sum().item()

	train_loss = cur_epoch_loss / total_train
	train_accuracy = correct_train / total_train
	loss_hist.append(train_loss)
	acc_hist.append(train_accuracy)

	fc_model.eval() # Set model to evaluation mode for validation
	fc_model_hist.append(copy.deepcopy(fc_model).to('cpu'))
	cur_epoch_loss = 0
	correct_test = 0
	total_test = 0

	with torch.no_grad():
	for inputs, labels in test_loader_adj:
	inputs, labels = inputs.to(device), labels.to(device)
	outputs = fc_model(inputs)
	loss = fc_criterion(outputs, labels)

	cur_epoch_loss += loss.item() * inputs.size(0)
	_, predicted = torch.max(outputs.data, 1)
	total_test += labels.size(0)
	correct_test += (predicted == labels).sum().item()

	test_loss = cur_epoch_loss / total_test
	test_accuracy = correct_test / total_test
	loss_val_hist.append(test_loss)
	acc_val_hist.append(test_accuracy)


	# print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

	# return fc_model,fc_model_hist,loss_hist,X_train,y_train,X_test,y_test
	if metric=='acc':
	reported_metric_train,reported_metric_val = acc_hist,acc_val_hist
	else:
	reported_metric_train,reported_metric_val = loss_hist,loss_val_hist
	return generate_learning_curve(reported_metric_train,reported_metric_val,hidden_units,noise,epochs,lr,metric)

	# @title functions for retriving app images
	def get_network_with_inputs(epoch, input_x, input_y,output_type = "HTML"):
	if epoch>len(fc_model_hist):
	epoch = len(fc_model_hist)
	with torch.no_grad():
	cur_model = fc_model_hist[epoch - 1]
	out, activations = cur_model.forward_with_activation(torch.tensor([input_x, input_y], dtype=torch.float32,device=cur_model.device))
	network_dot = visualize_network_with_weights(cur_model, activations=activations)
	if output_type=='PNG':
	cur_path = f'network_with_weights_activation_{epoch}'
	network_dot.render(cur_path, format='png', cleanup=True)
	return cur_path + ".png"
	else:
	svg_content = network_dot.pipe(format='svg').decode('utf-8')
	# Create HTML content embedding the SVG
	html_content = f'<div style="width:100%; height:100%;">{svg_content}</div>'
	return html_content


	get_plots_as_png = lambda des_list: [save_plot_as_image(plot) for plot in des_list]


	as_HTML=False

	def generate_images(epoch,net_with_unit_decisions=True):
	global fc_model_hist
	if epoch>len(fc_model_hist):
	epoch = len(fc_model_hist)
	fig = plot_decision_boundary(fc_model_hist[epoch-1], X_train, y_train, X_test, y_test, show=False,epoch=f'Epoch:{epoch}')
	# network_html = network_dot_paths_list[epoch]
	if not net_with_unit_decisions:
	network_dot = visualize_network_with_weights(fc_model_hist[epoch-1])
	else:
	decision_plots = plot_neuron_decision_boundaries(fc_model_hist[epoch-1], X_train)
	decision_boundary_images = {k:get_plots_as_png(decision_plots[k]) for k in decision_plots}
	network_dot = visualize_network_with_weights(fc_model_hist[epoch-1], activations=False, decision_boundary_images=decision_boundary_images)
	if as_HTML:
	svg_content = network_dot.pipe(format='svg').decode('utf-8')
	network_proccessed = f'<div style="width:100%; height:100%;">{svg_content}</div>'
	else:
	cur_path = f'{TEMP_DIR}/network_with_weights_activation_{epoch}'
	network_dot.render(cur_path, format='png', cleanup=True)
	network_proccessed = cur_path+".png"

	return fig, network_proccessed

	@contextmanager
	def dummy_context():
	yield