import torch import sys from pathlib import Path # Add parent directory to path sys.path.insert(0, str(Path(__file__).parent)) def check_model_memory(): # Simple config object class SimpleConfig: class ModelConfig: decoder_depth = 4 model = ModelConfig() cfg = SimpleConfig() # Import after path is set from dpm.model import VDPM # Create model on CPU first to count parameters print("Creating model...") model = VDPM(cfg) # Count parameters total_params = sum(p.numel() for p in model.parameters()) trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print(f"\n{'='*60}") print(f"MODEL SIZE ANALYSIS FOR RTX 3070 Ti (8GB)") print(f"{'='*60}") print(f"Total parameters: {total_params:,}") print(f"Trainable parameters: {trainable_params:,}") print(f"\nEstimated model weights memory:") print(f" - FP32 (float32): {total_params * 4 / 1024**3:.2f} GB") print(f" - FP16 (float16): {total_params * 2 / 1024**3:.2f} GB") print(f" - BF16 (bfloat16): {total_params * 2 / 1024**3:.2f} GB") print(f" - INT8 (quantized): {total_params * 1 / 1024**3:.2f} GB <-- RECOMMENDED for 8GB GPU") # Estimate activation memory for typical input batch_size = 1 num_frames = 5 # typical video length img_size = 518 print(f"\nEstimated activation memory (batch={batch_size}, frames={num_frames}, img_size={img_size}):") # Input images: [B, S, 3, H, W] input_mem = batch_size * num_frames * 3 * img_size * img_size * 4 / 1024**3 print(f" - Input images (FP32): {input_mem:.2f} GB") # Rough estimate for activations (can be 2-4x model size during forward pass) activation_mem_estimate = total_params * 2 * 3 / 1024**3 # conservative estimate print(f" - Activations (estimate): {activation_mem_estimate:.2f} GB") # Calculate total for different precision modes total_fp16 = (total_params * 2 / 1024**3) + input_mem + activation_mem_estimate total_int8 = (total_params * 1 / 1024**3) + input_mem + (activation_mem_estimate * 0.6) # INT8 reduces activations too print(f"\nTotal estimated GPU memory needed:") print(f" - With FP16/BF16: {total_fp16:.2f} GB") print(f" - With INT8 quantization: {total_int8:.2f} GB <-- FITS IN 8GB!") print(f"Your RTX 3070 Ti has: 8 GB VRAM") if total_int8 <= 8: print(f"\n✓ With INT8 quantization, model will fit in GPU memory!") print(f" Set USE_QUANTIZATION = True in gradio_demo.py") elif total_fp16 > 8: print(f"\n⚠️ WARNING: Even with INT8 ({total_int8:.2f} GB), memory is tight") print(f" Recommendations:") print(f" 1. Use INT8 quantization (USE_QUANTIZATION = True)") print(f" 2. Reduce number of input frames to {num_frames} or fewer") print(f" 3. Clear CUDA cache between batches") else: print(f"\n✓ Model should fit with FP16!") print(f"{'='*60}\n") # Check actual GPU memory if CUDA available if torch.cuda.is_available(): print(f"GPU: {torch.cuda.get_device_name(0)}") print(f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB") print(f"Current GPU memory allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB") print(f"Current GPU memory cached: {torch.cuda.memory_reserved() / 1024**3:.2f} GB") if __name__ == "__main__": check_model_memory()