File size: 4,181 Bytes
35aaa09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# GPU Configuration
# Docking@HOME - CUDA/CUDPP Settings

# === GPU Selection ===
use_gpu = true                     # Enable GPU acceleration
gpu_device_id = 0                  # GPU device ID to use (-1 = auto-detect best)
use_multiple_gpus = false          # Use multiple GPUs if available
gpu_ids = [0]                      # List of GPU IDs to use (if use_multiple_gpus = true)

# === CUDA Settings ===
cuda_device_name = "auto"          # CUDA device name (auto = auto-detect)
cuda_compute_capability = "auto"   # Compute capability (auto, 5.0, 6.1, 7.0, 7.5, 8.0, 8.6, 9.0)
cuda_threads_per_block = 256       # Threads per block (64, 128, 256, 512, 1024)
cuda_blocks_per_grid = 128         # Blocks per grid (32, 64, 128, 256)

# === Memory Settings ===
gpu_memory_limit_mb = 0            # GPU memory limit in MB (0 = auto, use available)
host_memory_pinned = true          # Use pinned host memory for faster transfers
cache_maps_on_gpu = true           # Cache grid maps on GPU memory

# === Performance Tuning ===
# AutoDock-GPU specific settings
energy_eval_per_gpu_call = 1024    # Energy evaluations per GPU kernel call
ls_method = "sw"                   # Local search method: sw (Solis-Wets), sd (Steepest Descent), fire
population_size = 150              # Population size for genetic algorithm
num_generations = 27000            # Number of generations

# === CUDPP Settings ===
use_cudpp = true                   # Use CUDPP for GPU primitives
cudpp_sort_algorithm = "radix"     # Sort algorithm: radix, merge, quick
cudpp_scan_algorithm = "efficient" # Scan algorithm: efficient, work-efficient

# === Optimization Flags ===
optimize_for_speed = true          # Optimize for speed vs accuracy
use_fast_math = true               # Use fast math operations (less precise)
use_texture_memory = true          # Use texture memory for grid maps
async_execution = true             # Asynchronous kernel execution

# === Multi-GPU Load Balancing ===
load_balance_strategy = "dynamic"  # static, dynamic, round-robin
tasks_per_gpu_min = 10             # Minimum tasks per GPU
tasks_per_gpu_max = 100            # Maximum tasks per GPU

# === Error Handling ===
retry_on_gpu_error = true          # Retry on GPU errors
max_gpu_retries = 3                # Maximum retry attempts
fallback_to_cpu = true             # Fallback to CPU on GPU failure

# === Thermal Management ===
enable_thermal_monitoring = true   # Monitor GPU temperature
max_gpu_temperature = 85           # Maximum GPU temperature (°C)
throttle_at_temperature = 80       # Start throttling at this temperature (°C)
shutdown_at_temperature = 90       # Emergency shutdown temperature (°C)
check_temperature_interval = 10    # Seconds between temperature checks

# === Power Management ===
gpu_power_limit_watts = 0          # Power limit in watts (0 = default)
enable_power_monitoring = true     # Monitor power consumption

# === Debugging ===
verbose_gpu_output = false         # Enable verbose GPU output
profile_gpu_kernels = false        # Profile GPU kernel execution times
save_gpu_debug_info = false        # Save debug information
cuda_error_checking = true         # Enable CUDA error checking (slower)

# === Compatibility ===
force_cpu_mode = false             # Force CPU mode even if GPU available
gpu_driver_version_min = "450.0"   # Minimum GPU driver version
cuda_runtime_version_min = "11.0"  # Minimum CUDA runtime version

# === Specific GPU Optimizations ===
# NVIDIA RTX 30xx Series
rtx30xx_optimized = false
# NVIDIA RTX 40xx Series  
rtx40xx_optimized = false
# AMD RDNA2/3
amd_rdna_optimized = false

# === Benchmark Settings ===
run_benchmark_on_startup = false   # Run benchmark on startup
benchmark_duration_seconds = 30    # Benchmark duration
save_benchmark_results = true      # Save benchmark results

# === Advanced CUDA Settings ===
cuda_stream_count = 2              # Number of CUDA streams
cuda_graph_enabled = false         # Use CUDA graphs (CUDA 10+)
cooperative_groups = false         # Use cooperative groups
unified_memory = false             # Use CUDA unified memory