Spaces:

dxm21
/

vdpm

Running on Zero

App Files Files Community

vdpm / check_model_size.py

dxm21

Upload folder using huggingface_hub

b678162 verified about 2 months ago

raw

history blame contribute delete

3.62 kB

	import torch
	import sys
	from pathlib import Path

	# Add parent directory to path
	sys.path.insert(0, str(Path(__file__).parent))

	def check_model_memory():
	# Simple config object
	class SimpleConfig:
	class ModelConfig:
	decoder_depth = 4
	model = ModelConfig()

	cfg = SimpleConfig()

	# Import after path is set
	from dpm.model import VDPM

	# Create model on CPU first to count parameters
	print("Creating model...")
	model = VDPM(cfg)

	# Count parameters
	total_params = sum(p.numel() for p in model.parameters())
	trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

	print(f"\n{'='*60}")
	print(f"MODEL SIZE ANALYSIS FOR RTX 3070 Ti (8GB)")
	print(f"{'='*60}")
	print(f"Total parameters: {total_params:,}")
	print(f"Trainable parameters: {trainable_params:,}")
	print(f"\nEstimated model weights memory:")
	print(f" - FP32 (float32): {total_params * 4 / 1024**3:.2f} GB")
	print(f" - FP16 (float16): {total_params * 2 / 1024**3:.2f} GB")
	print(f" - BF16 (bfloat16): {total_params * 2 / 1024**3:.2f} GB")
	print(f" - INT8 (quantized): {total_params * 1 / 1024**3:.2f} GB <-- RECOMMENDED for 8GB GPU")

	# Estimate activation memory for typical input
	batch_size = 1
	num_frames = 5 # typical video length
	img_size = 518
	print(f"\nEstimated activation memory (batch={batch_size}, frames={num_frames}, img_size={img_size}):")

	# Input images: [B, S, 3, H, W]
	input_mem = batch_size * num_frames * 3 * img_size * img_size * 4 / 1024**3
	print(f" - Input images (FP32): {input_mem:.2f} GB")

	# Rough estimate for activations (can be 2-4x model size during forward pass)
	activation_mem_estimate = total_params * 2 * 3 / 1024**3 # conservative estimate
	print(f" - Activations (estimate): {activation_mem_estimate:.2f} GB")

	# Calculate total for different precision modes
	total_fp16 = (total_params * 2 / 1024**3) + input_mem + activation_mem_estimate
	total_int8 = (total_params * 1 / 1024*3) + input_mem + (activation_mem_estimate 0.6) # INT8 reduces activations too

	print(f"\nTotal estimated GPU memory needed:")
	print(f" - With FP16/BF16: {total_fp16:.2f} GB")
	print(f" - With INT8 quantization: {total_int8:.2f} GB <-- FITS IN 8GB!")
	print(f"Your RTX 3070 Ti has: 8 GB VRAM")

	if total_int8 <= 8:
	print(f"\n✓ With INT8 quantization, model will fit in GPU memory!")
	print(f" Set USE_QUANTIZATION = True in gradio_demo.py")
	elif total_fp16 > 8:
	print(f"\n⚠️ WARNING: Even with INT8 ({total_int8:.2f} GB), memory is tight")
	print(f" Recommendations:")
	print(f" 1. Use INT8 quantization (USE_QUANTIZATION = True)")
	print(f" 2. Reduce number of input frames to {num_frames} or fewer")
	print(f" 3. Clear CUDA cache between batches")
	else:
	print(f"\n✓ Model should fit with FP16!")

	print(f"{'='*60}\n")

	# Check actual GPU memory if CUDA available
	if torch.cuda.is_available():
	print(f"GPU: {torch.cuda.get_device_name(0)}")
	print(f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
	print(f"Current GPU memory allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
	print(f"Current GPU memory cached: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")

	if __name__ == "__main__":
	check_model_memory()