| | import torch
|
| | import sys
|
| | from pathlib import Path
|
| |
|
| |
|
| | sys.path.insert(0, str(Path(__file__).parent))
|
| |
|
| | def check_model_memory():
|
| |
|
| | class SimpleConfig:
|
| | class ModelConfig:
|
| | decoder_depth = 4
|
| | model = ModelConfig()
|
| |
|
| | cfg = SimpleConfig()
|
| |
|
| |
|
| | from dpm.model import VDPM
|
| |
|
| |
|
| | print("Creating model...")
|
| | model = VDPM(cfg)
|
| |
|
| |
|
| | total_params = sum(p.numel() for p in model.parameters())
|
| | trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
| |
|
| | print(f"\n{'='*60}")
|
| | print(f"MODEL SIZE ANALYSIS FOR RTX 3070 Ti (8GB)")
|
| | print(f"{'='*60}")
|
| | print(f"Total parameters: {total_params:,}")
|
| | print(f"Trainable parameters: {trainable_params:,}")
|
| | print(f"\nEstimated model weights memory:")
|
| | print(f" - FP32 (float32): {total_params * 4 / 1024**3:.2f} GB")
|
| | print(f" - FP16 (float16): {total_params * 2 / 1024**3:.2f} GB")
|
| | print(f" - BF16 (bfloat16): {total_params * 2 / 1024**3:.2f} GB")
|
| | print(f" - INT8 (quantized): {total_params * 1 / 1024**3:.2f} GB <-- RECOMMENDED for 8GB GPU")
|
| |
|
| |
|
| | batch_size = 1
|
| | num_frames = 5
|
| | img_size = 518
|
| | print(f"\nEstimated activation memory (batch={batch_size}, frames={num_frames}, img_size={img_size}):")
|
| |
|
| |
|
| | input_mem = batch_size * num_frames * 3 * img_size * img_size * 4 / 1024**3
|
| | print(f" - Input images (FP32): {input_mem:.2f} GB")
|
| |
|
| |
|
| | activation_mem_estimate = total_params * 2 * 3 / 1024**3
|
| | print(f" - Activations (estimate): {activation_mem_estimate:.2f} GB")
|
| |
|
| |
|
| | total_fp16 = (total_params * 2 / 1024**3) + input_mem + activation_mem_estimate
|
| | total_int8 = (total_params * 1 / 1024**3) + input_mem + (activation_mem_estimate * 0.6)
|
| |
|
| | print(f"\nTotal estimated GPU memory needed:")
|
| | print(f" - With FP16/BF16: {total_fp16:.2f} GB")
|
| | print(f" - With INT8 quantization: {total_int8:.2f} GB <-- FITS IN 8GB!")
|
| | print(f"Your RTX 3070 Ti has: 8 GB VRAM")
|
| |
|
| | if total_int8 <= 8:
|
| | print(f"\n✓ With INT8 quantization, model will fit in GPU memory!")
|
| | print(f" Set USE_QUANTIZATION = True in gradio_demo.py")
|
| | elif total_fp16 > 8:
|
| | print(f"\n⚠️ WARNING: Even with INT8 ({total_int8:.2f} GB), memory is tight")
|
| | print(f" Recommendations:")
|
| | print(f" 1. Use INT8 quantization (USE_QUANTIZATION = True)")
|
| | print(f" 2. Reduce number of input frames to {num_frames} or fewer")
|
| | print(f" 3. Clear CUDA cache between batches")
|
| | else:
|
| | print(f"\n✓ Model should fit with FP16!")
|
| |
|
| | print(f"{'='*60}\n")
|
| |
|
| |
|
| | if torch.cuda.is_available():
|
| | print(f"GPU: {torch.cuda.get_device_name(0)}")
|
| | print(f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
|
| | print(f"Current GPU memory allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
|
| | print(f"Current GPU memory cached: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
|
| |
|
| | if __name__ == "__main__":
|
| | check_model_memory()
|
| |
|
| |
|