Spaces:

hesamation
/

primer-llm-embedding

Running

first commit

9940f8d 11 months ago

1.21 kB

	"""
	Prints out the ratio of activation memory for the a transformer Block when using ReLU vs GELU.
	"""

	import torch
	import torch.nn as nn

	import act_mem
	import layers

	if __name__ == "__main__":
	batch_size, seq_len, d_model, n_heads = 2, 4096, 1024, 2
	dtype = torch.bfloat16
	inputs = torch.randn(
	batch_size,
	seq_len,
	d_model,
	device="cuda",
	requires_grad=True,
	dtype=dtype,
	)

	act_fn_dict = {"ReLU": nn.ReLU(), "GELU": nn.GELU()}
	# Append outputs to a list to keep tensors alive
	outputs = []
	mem_bytes = []

	for name, act_fn in act_fn_dict.items():
	block = layers.Block(
	d_model=d_model,
	act_fn=act_fn,
	n_heads=n_heads,
	device="cuda",
	dtype=dtype,
	)
	with act_mem.AllocatedMemContext() as mem, act_mem.SavedTensorContext(
	ignored_tensors=block.parameters()
	) as saved:
	out = block(inputs)
	outputs.append(out)
	print(f"{name} block bytes: {saved.saved_tensor_mem}")
	mem_bytes.append(saved.saved_tensor_mem)

	print(f"ReLU/GeLU block act mem ratio: {mem_bytes[0]/mem_bytes[1]}")