Upload 16 files

93783dd verified 19 days ago

15.1 kB

	"""
	GPT-300M 3D Neural Network Visualization
	==========================================
	A 3D node-and-connection neural network diagram with depth,
	perspective, and accurate parameter counts.
	"""

	import matplotlib
	matplotlib.use("Agg")

	import matplotlib.pyplot as plt
	from mpl_toolkits.mplot3d import Axes3D
	from mpl_toolkits.mplot3d.art3d import Line3DCollection
	import numpy as np

	# ═══════════════════════════════════════════════════════════════════════
	# ACCURATE GPT-300M PARAMETERS
	# ═══════════════════════════════════════════════════════════════════════

	VOCAB = 32_000
	D = 1_024
	HEADS = 16
	HEAD_D = 64
	D_FF = 4_096
	N_LAYERS = 24

	embed_p = VOCAB * D # 32,768,000
	attn_p = 4 * D * D # 4,194,304 per layer
	ffn_p = 2 * D * D_FF # 8,388,608 per layer
	norm_p = 2 * D # 2,048 per layer
	layer_p = attn_p + ffn_p + norm_p # 12,584,960 per layer
	all_layers_p = layer_p * N_LAYERS # 302,039,040
	final_norm_p = D # 1,024
	TOTAL = embed_p + all_layers_p + final_norm_p # 334,808,064

	# Layer definitions: (name, num_display_nodes, actual_neurons, params, color_hex)
	LAYERS = [
	("Input Tokens", 10, VOCAB, 0, "#4CAF50"),
	("Token Embedding", 12, D, embed_p, "#2196F3"),
	("RoPE Positions", 12, D, 0, "#00BCD4"),
	("Layer 1: Attention QKV", 14, D, attn_p * 3 // 4, "#FF9800"),
	("Layer 1: Attention Out", 12, D, attn_p * 1 // 4, "#FF9800"),
	("Layer 1: FFN Up (GELU)", 16, D_FF, ffn_p // 2, "#8BC34A"),
	("Layer 1: FFN Down", 12, D, ffn_p // 2, "#8BC34A"),
	("Layers 2–23 (×22)", 14, D, layer_p * 22, "#9C27B0"),
	("Layer 24: Attention", 14, D, attn_p, "#FF5722"),
	("Layer 24: FFN", 16, D_FF, ffn_p, "#009688"),
	("Layer 24: Norm + Out", 12, D, norm_p + final_norm_p, "#E91E63"),
	("LM Head (weight-tied)", 12, VOCAB, 0, "#F44336"),
	("Output Probabilities", 1, VOCAB, 0, "#FF1744"),
	]


	def hex_to_rgb(h):
	h = h.lstrip("#")
	return tuple(int(h[i:i+2], 16) / 255.0 for i in (0, 2, 4))


	def generate_3d_network(save_path="neural_network_3d.png", elev=22, azim=-65):
	"""Generate a 3D neural network with nodes, connections, and parameter labels."""

	fig = plt.figure(figsize=(28, 28), facecolor="#0a0e17")
	ax = fig.add_subplot(111, projection="3d", computed_zorder=False)

	# Dark theme for 3D axes
	ax.set_facecolor("#0a0e17")
	ax.xaxis.pane.fill = False
	ax.yaxis.pane.fill = False
	ax.zaxis.pane.fill = False
	ax.xaxis.pane.set_edgecolor("#0a0e17")
	ax.yaxis.pane.set_edgecolor("#0a0e17")
	ax.zaxis.pane.set_edgecolor("#0a0e17")
	ax.grid(False)
	ax.set_axis_off()

	ax.view_init(elev=elev, azim=azim)

	n_layers = len(LAYERS)
	y_positions = np.linspace(0, n_layers * 4.0, n_layers) # depth (layer position)

	all_positions = [] # list of (xs, ys_unused, zs, y_layer)
	running_params = 0

	for i, (name, n_nodes, actual, params, color_hex) in enumerate(LAYERS):
	y = y_positions[i]
	running_params += params

	rgb = hex_to_rgb(color_hex)

	# Arrange nodes in a circle/arc for 3D effect
	if n_nodes == 1:
	xs = np.array([0.0])
	zs = np.array([0.0])
	else:
	# Spread nodes along x
	spread = min(n_nodes * 0.5, 7.0)
	xs = np.linspace(-spread, spread, n_nodes)
	# Slight arc for 3D depth perception
	zs = -0.1 * (xs ** 2)

	ys = np.full_like(xs, y)
	all_positions.append((xs, ys, zs))

	# ── Draw connections to previous layer ──────────────────
	if i > 0:
	prev_xs, prev_ys, prev_zs = all_positions[i - 1]

	# Sample connections to avoid clutter
	n_prev = len(prev_xs)
	n_curr = len(xs)
	step_p = max(1, n_prev // 8)
	step_c = max(1, n_curr // 8)

	lines = []
	colors_lines = []
	for pi in range(0, n_prev, step_p):
	for ci in range(0, n_curr, step_c):
	lines.append([
	(prev_xs[pi], prev_ys[pi], prev_zs[pi]),
	(xs[ci], ys[ci], zs[ci]),
	])
	colors_lines.append((*rgb, 0.18))

	if lines:
	lc = Line3DCollection(lines, colors=colors_lines, linewidths=0.7)
	ax.add_collection3d(lc)

	# ── Draw nodes (spheres) ────────────────────────────────
	node_size = 200 if n_nodes > 12 else 280
	if n_nodes == 1:
	node_size = 600

	ax.scatter(
	xs, ys, zs,
	c=[color_hex], s=node_size,
	alpha=0.95, edgecolors="white", linewidths=0.5,
	depthshade=True, zorder=5,
	)

	# ── Glow effect (larger transparent scatter behind) ─────
	ax.scatter(
	xs, ys, zs,
	c=[color_hex], s=node_size * 3,
	alpha=0.08, edgecolors="none",
	depthshade=True, zorder=4,
	)

	# ── Labels ──────────────────────────────────────────────
	label_x = xs[-1] + 1.8 if n_nodes > 1 else 2.0
	ax.text(
	label_x, y, 0,
	name,
	fontsize=9.5, fontweight="bold",
	color="#E6EDF3", fontfamily="monospace",
	zorder=10,
	)

	# Param count
	if params > 0:
	if params >= 1_000_000:
	ptxt = f"{params/1e6:.1f}M params"
	else:
	ptxt = f"{params:,} params"
	ax.text(
	label_x, y, -1.0,
	ptxt,
	fontsize=8, color=color_hex,
	fontfamily="monospace", fontweight="bold",
	zorder=10,
	)

	# Running total
	if running_params > 0:
	ax.text(
	label_x, y, -1.8,
	f"Σ {running_params/1e6:.1f}M",
	fontsize=6, color="#8B949E",
	fontfamily="monospace",
	zorder=10,
	)

	# Overflow indicator
	if actual > n_nodes and n_nodes > 1:
	ax.text(
	xs[-1] + 0.5, y, zs[-1],
	f"(+{actual - n_nodes:,})",
	fontsize=6, color="#8B949E",
	fontfamily="monospace",
	zorder=10,
	)

	# ── Title ──────────────────────────────────────────────────────
	ax.text2D(
	0.5, 0.96,
	"GPT-300M • 3D Neural Network Architecture",
	transform=fig.transFigure,
	fontsize=22, fontweight="bold", color="#E6EDF3",
	ha="center", fontfamily="monospace",
	)
	ax.text2D(
	0.5, 0.94,
	f"{TOTAL:,} parameters \| {N_LAYERS} layers \| {HEADS} heads \| d_model={D} \| d_ff={D_FF}",
	transform=fig.transFigure,
	fontsize=10, color="#8B949E",
	ha="center", fontfamily="monospace",
	)

	# ── Parameter summary ──────────────────────────────────────────
	summary = (
	f"Parameter Breakdown:\n"
	f" Embedding: {embed_p/1e6:>7.1f}M ({embed_p/TOTAL*100:.1f}%)\n"
	f" Attention ×24: {attn_pN_LAYERS/1e6:>7.1f}M ({attn_pN_LAYERS/TOTAL*100:.1f}%)\n"
	f" FFN ×24: {ffn_pN_LAYERS/1e6:>7.1f}M ({ffn_pN_LAYERS/TOTAL*100:.1f}%)\n"
	f" Norms: {(norm_pN_LAYERS+final_norm_p)/1e6:>7.3f}M ({(norm_pN_LAYERS+final_norm_p)/TOTAL*100:.1f}%)\n"
	f" LM Head: tied (0 extra)\n"
	f" ───────────────────────\n"
	f" TOTAL: {TOTAL/1e6:>7.1f}M"
	)
	ax.text2D(
	0.02, 0.06, summary,
	transform=fig.transFigure,
	fontsize=8, color="#58A6FF",
	fontfamily="monospace", verticalalignment="bottom",
	bbox=dict(boxstyle="round,pad=0.6", facecolor="#161B22",
	edgecolor="#30363D", linewidth=1),
	)

	# ── Legend ──────────────────────────────────────────────────────
	legend_items = [
	("#4CAF50", "Input"), ("#2196F3", "Embeddings"), ("#FF9800", "Attention"),
	("#8BC34A", "FFN"), ("#9C27B0", "×22 Layers"), ("#E91E63", "Norm"),
	("#F44336", "Output"),
	]
	for j, (c, l) in enumerate(legend_items):
	ax.text2D(
	0.92, 0.30 - j * 0.025, f"● {l}",
	transform=fig.transFigure,
	fontsize=8, color=c, fontfamily="monospace",
	)

	# Set axis limits
	all_x = np.concatenate([p[0] for p in all_positions])
	all_y = np.concatenate([p[1] for p in all_positions])
	all_z = np.concatenate([p[2] for p in all_positions])
	margin = 4
	ax.set_xlim(all_x.min() - margin, all_x.max() + margin + 8)
	ax.set_ylim(all_y.min() - margin, all_y.max() + margin)
	ax.set_zlim(all_z.min() - margin, all_z.max() + margin)

	plt.savefig(save_path, dpi=200, bbox_inches="tight",
	facecolor="#0a0e17", edgecolor="none")
	print(f"Saved: {save_path}")
	plt.close()


	def generate_3d_single_layer(save_path="layer_3d.png", elev=18, azim=-55):
	"""3D view of a single transformer layer internals."""

	fig = plt.figure(figsize=(22, 18), facecolor="#0a0e17")
	ax = fig.add_subplot(111, projection="3d", computed_zorder=False)

	ax.set_facecolor("#0a0e17")
	ax.xaxis.pane.fill = False
	ax.yaxis.pane.fill = False
	ax.zaxis.pane.fill = False
	ax.xaxis.pane.set_edgecolor("#0a0e17")
	ax.yaxis.pane.set_edgecolor("#0a0e17")
	ax.zaxis.pane.set_edgecolor("#0a0e17")
	ax.grid(False)
	ax.set_axis_off()
	ax.view_init(elev=elev, azim=azim)

	sub_layers = [
	("Input (d=1024)", 10, D, 0, "#2196F3"),
	("Query (d=1024)", 10, D, D*D, "#FF6B6B"),
	("Key (d=1024)", 10, D, D*D, "#4ECDC4"),
	("Value (d=1024)", 10, D, D*D, "#45B7D1"),
	("16 Attention Heads", 16, D, 0, "#FF9800"),
	("Attn Output (d=1024)", 10, D, D*D, "#FFA726"),
	("⊕ Residual + RMSNorm", 10, D, D, "#E91E63"),
	("FFN Up → GELU (d=4096)", 16, D_FF, D*D_FF, "#8BC34A"),
	("FFN Down (d=1024)", 10, D, D_FF*D, "#7CB342"),
	("⊕ Residual + RMSNorm", 10, D, D, "#E91E63"),
	("Layer Output (d=1024)", 10, D, 0, "#2196F3"),
	]

	n = len(sub_layers)
	y_positions = np.linspace(0, n * 3, n)
	all_pos = []

	for i, (name, n_nodes, actual, params, chex) in enumerate(sub_layers):
	y = y_positions[i]
	rgb = hex_to_rgb(chex)

	spread = min(n_nodes * 0.45, 5.5)
	xs = np.linspace(-spread, spread, n_nodes)
	zs = -0.12 * (xs ** 2)
	ys = np.full_like(xs, y)
	all_pos.append((xs, ys, zs))

	# Connections
	if i > 0:
	pxs, pys, pzs = all_pos[i - 1]
	sp = max(1, len(pxs) // 8)
	sc = max(1, len(xs) // 8)
	lines = []
	cols = []
	for pi in range(0, len(pxs), sp):
	for ci in range(0, len(xs), sc):
	lines.append([(pxs[pi], pys[pi], pzs[pi]), (xs[ci], ys[ci], zs[ci])])
	cols.append((*rgb, 0.15))
	if lines:
	ax.add_collection3d(Line3DCollection(lines, colors=cols, linewidths=0.6))

	# Nodes
	sz = 130 if n_nodes > 12 else 180
	ax.scatter(xs, ys, zs, c=[chex], s=sz, alpha=0.95,
	edgecolors="white", linewidths=0.5, depthshade=True, zorder=5)
	ax.scatter(xs, ys, zs, c=[chex], s=sz * 3, alpha=0.07,
	edgecolors="none", depthshade=True, zorder=4)

	# Labels
	lx = xs[-1] + 1.0
	ax.text(lx, y, 0, name, fontsize=9, fontweight="bold",
	color="#E6EDF3", fontfamily="monospace", zorder=10)
	if params > 0:
	ax.text(lx, y, -0.8, f"{params:,} params",
	fontsize=7, color=chex, fontfamily="monospace",
	fontweight="bold", zorder=10)

	if actual > n_nodes:
	ax.text(xs[-1] + 0.4, y, zs[-1], f"(+{actual-n_nodes:,})",
	fontsize=6, color="#8B949E", fontfamily="monospace", zorder=10)

	ax.text2D(0.5, 0.96, "Single Transformer Layer — 3D View",
	transform=fig.transFigure, fontsize=20, fontweight="bold",
	color="#E6EDF3", ha="center", fontfamily="monospace")
	ax.text2D(0.5, 0.935,
	f"12,584,960 params/layer × 24 layers = 302,039,040 total",
	transform=fig.transFigure, fontsize=10, color="#8B949E",
	ha="center", fontfamily="monospace")

	all_x = np.concatenate([p[0] for p in all_pos])
	all_y = np.concatenate([p[1] for p in all_pos])
	all_z = np.concatenate([p[2] for p in all_pos])
	ax.set_xlim(all_x.min() - 2, all_x.max() + 8)
	ax.set_ylim(all_y.min() - 2, all_y.max() + 2)
	ax.set_zlim(all_z.min() - 2, all_z.max() + 2)

	plt.savefig(save_path, dpi=200, bbox_inches="tight",
	facecolor="#0a0e17", edgecolor="none")
	print(f"Saved: {save_path}")
	plt.close()


	def generate_3d_rotating_views(base_path="viz"):
	"""Generate multiple angle views."""
	import os
	os.makedirs(base_path, exist_ok=True)

	# Main dramatic angle — more front-facing
	generate_3d_network(f"{base_path}/nn_3d_main.png", elev=12, azim=-15)

	# Angled view
	generate_3d_network(f"{base_path}/nn_3d_top.png", elev=35, azim=-25)

	# Side angle
	generate_3d_network(f"{base_path}/nn_3d_side.png", elev=8, azim=-45)

	# Single layer detail
	generate_3d_single_layer(f"{base_path}/nn_3d_layer.png", elev=18, azim=-55)


	if __name__ == "__main__":
	import os
	os.makedirs("viz", exist_ok=True)

	print("=" * 55)
	print(" GPT-300M • 3D Visualization Generator")
	print("=" * 55)
	print(f" Total parameters: {TOTAL:,}")
	print(f" Per layer: {layer_p:,}")
	print(f" Layers: {N_LAYERS}")
	print("=" * 55)

	generate_3d_rotating_views("viz")
	print("\nAll 3D views generated!")