vdpm / check_model_size.py
dxm21's picture
Upload folder using huggingface_hub
b678162 verified
import torch
import sys
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent))
def check_model_memory():
# Simple config object
class SimpleConfig:
class ModelConfig:
decoder_depth = 4
model = ModelConfig()
cfg = SimpleConfig()
# Import after path is set
from dpm.model import VDPM
# Create model on CPU first to count parameters
print("Creating model...")
model = VDPM(cfg)
# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"\n{'='*60}")
print(f"MODEL SIZE ANALYSIS FOR RTX 3070 Ti (8GB)")
print(f"{'='*60}")
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"\nEstimated model weights memory:")
print(f" - FP32 (float32): {total_params * 4 / 1024**3:.2f} GB")
print(f" - FP16 (float16): {total_params * 2 / 1024**3:.2f} GB")
print(f" - BF16 (bfloat16): {total_params * 2 / 1024**3:.2f} GB")
print(f" - INT8 (quantized): {total_params * 1 / 1024**3:.2f} GB <-- RECOMMENDED for 8GB GPU")
# Estimate activation memory for typical input
batch_size = 1
num_frames = 5 # typical video length
img_size = 518
print(f"\nEstimated activation memory (batch={batch_size}, frames={num_frames}, img_size={img_size}):")
# Input images: [B, S, 3, H, W]
input_mem = batch_size * num_frames * 3 * img_size * img_size * 4 / 1024**3
print(f" - Input images (FP32): {input_mem:.2f} GB")
# Rough estimate for activations (can be 2-4x model size during forward pass)
activation_mem_estimate = total_params * 2 * 3 / 1024**3 # conservative estimate
print(f" - Activations (estimate): {activation_mem_estimate:.2f} GB")
# Calculate total for different precision modes
total_fp16 = (total_params * 2 / 1024**3) + input_mem + activation_mem_estimate
total_int8 = (total_params * 1 / 1024**3) + input_mem + (activation_mem_estimate * 0.6) # INT8 reduces activations too
print(f"\nTotal estimated GPU memory needed:")
print(f" - With FP16/BF16: {total_fp16:.2f} GB")
print(f" - With INT8 quantization: {total_int8:.2f} GB <-- FITS IN 8GB!")
print(f"Your RTX 3070 Ti has: 8 GB VRAM")
if total_int8 <= 8:
print(f"\n✓ With INT8 quantization, model will fit in GPU memory!")
print(f" Set USE_QUANTIZATION = True in gradio_demo.py")
elif total_fp16 > 8:
print(f"\n⚠️ WARNING: Even with INT8 ({total_int8:.2f} GB), memory is tight")
print(f" Recommendations:")
print(f" 1. Use INT8 quantization (USE_QUANTIZATION = True)")
print(f" 2. Reduce number of input frames to {num_frames} or fewer")
print(f" 3. Clear CUDA cache between batches")
else:
print(f"\n✓ Model should fit with FP16!")
print(f"{'='*60}\n")
# Check actual GPU memory if CUDA available
if torch.cuda.is_available():
print(f"GPU: {torch.cuda.get_device_name(0)}")
print(f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
print(f"Current GPU memory allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
print(f"Current GPU memory cached: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
if __name__ == "__main__":
check_model_memory()