File size: 1,194 Bytes
101858b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""

Memory Optimization Configuration

Configuration for the unified memory management system.

"""
import torch
from dataclasses import dataclass
from typing import Optional

@dataclass
class MemoryOptimizationConfig:
    """Configuration for memory optimization system."""
    # Device settings
    device: str = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Memory thresholds
    memory_threshold: float = 0.85  # 85% memory usage threshold
    cleanup_threshold: float = 0.75  # 75% threshold for proactive cleanup
    
    # Tensor pool settings
    max_pool_size: int = 50  # Reduced for RTX 3060
    max_tensor_size: int = 1000000  # Don't pool very large tensors
    
    # Cleanup settings
    cleanup_frequency: int = 25  # Cleanup every 25 operations
    
    # Memory optimization flags
    use_4bit_quantization: bool = True
    use_gradient_checkpointing: bool = True
    use_mixed_precision: bool = True
    
    # Shared model integration
    use_shared_model: bool = True  # Use shared Qwen model
    shared_model_name: str = "Qwen/Qwen3-0.6B"  # Default shared model
    
    # Lazy loading
    enable_lazy_loading: bool = True