fix(config): remove mem_per_worker_gib from config files and calculate dynamically in resource_probe script
Browse files- README.md +0 -1
- configs/eda_config_template.yaml +0 -1
- configs/eda_optimized.yaml +1 -1
- scripts/resource_probe.py +16 -9
README.md
CHANGED
|
@@ -131,7 +131,6 @@ The pipeline respects your resource limits and adapts processing strategy by dat
|
|
| 131 |
resources:
|
| 132 |
max_memory_gib: 240 # Total memory available
|
| 133 |
max_workers: 42 # Maximum parallel workers
|
| 134 |
-
mem_per_worker_gib: 5.5 # Memory per worker
|
| 135 |
chunk_size: 12288 # Matrix chunk size
|
| 136 |
|
| 137 |
dataset_thresholds:
|
|
|
|
| 131 |
resources:
|
| 132 |
max_memory_gib: 240 # Total memory available
|
| 133 |
max_workers: 42 # Maximum parallel workers
|
|
|
|
| 134 |
chunk_size: 12288 # Matrix chunk size
|
| 135 |
|
| 136 |
dataset_thresholds:
|
configs/eda_config_template.yaml
CHANGED
|
@@ -5,7 +5,6 @@
|
|
| 5 |
resources:
|
| 6 |
max_memory_gib: 256 # Total memory available
|
| 7 |
max_workers: 32 # Maximum concurrent workers
|
| 8 |
-
mem_per_worker_gib: 8.0 # Memory per worker process
|
| 9 |
chunk_size: 8192 # Chunk size for reading X matrix
|
| 10 |
|
| 11 |
# Input/Output Paths
|
|
|
|
| 5 |
resources:
|
| 6 |
max_memory_gib: 256 # Total memory available
|
| 7 |
max_workers: 32 # Maximum concurrent workers
|
|
|
|
| 8 |
chunk_size: 8192 # Chunk size for reading X matrix
|
| 9 |
|
| 10 |
# Input/Output Paths
|
configs/eda_optimized.yaml
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
|
| 5 |
resources:
|
| 6 |
max_memory_gib: 200 # Leave ~10 GB buffer for system
|
| 7 |
-
max_workers:
|
| 8 |
chunk_size: 12288 # Good balance for large matrices
|
| 9 |
|
| 10 |
paths:
|
|
|
|
| 4 |
|
| 5 |
resources:
|
| 6 |
max_memory_gib: 200 # Leave ~10 GB buffer for system
|
| 7 |
+
max_workers: 24 # Based on actual RAM availability
|
| 8 |
chunk_size: 12288 # Good balance for large matrices
|
| 9 |
|
| 10 |
paths:
|
scripts/resource_probe.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
-
"""Probe local HPC resources and suggest safe EDA concurrency settings.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
from __future__ import annotations
|
| 5 |
|
|
@@ -9,6 +13,7 @@ import os
|
|
| 9 |
import platform
|
| 10 |
import shutil
|
| 11 |
from pathlib import Path
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
def _mem_available_gib() -> float:
|
|
@@ -48,14 +53,16 @@ def main() -> None:
|
|
| 48 |
default=Path("/project/GOV108018"),
|
| 49 |
help="Path to check disk usage for.",
|
| 50 |
)
|
| 51 |
-
parser.add_argument(
|
| 52 |
-
"--mem-per-worker-gib",
|
| 53 |
-
type=float,
|
| 54 |
-
default=8.0,
|
| 55 |
-
help="Memory budget per EDA worker to compute a safe recommendation.",
|
| 56 |
-
)
|
| 57 |
args = parser.parse_args()
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
cpu_count = os.cpu_count() or 1
|
| 60 |
mem_total_gib = _mem_total_gib()
|
| 61 |
mem_available_gib = _mem_available_gib()
|
|
@@ -64,7 +71,7 @@ def main() -> None:
|
|
| 64 |
recommended_workers = _recommend_workers(
|
| 65 |
cpu_count=cpu_count,
|
| 66 |
mem_available_gib=mem_available_gib,
|
| 67 |
-
mem_per_worker_gib=
|
| 68 |
)
|
| 69 |
recommended_shards = max(1, min(8, cpu_count // max(1, recommended_workers)))
|
| 70 |
|
|
@@ -77,7 +84,7 @@ def main() -> None:
|
|
| 77 |
"disk_total_gib": round(disk_total / (1024**3), 2),
|
| 78 |
"disk_used_gib": round(disk_used / (1024**3), 2),
|
| 79 |
"disk_free_gib": round(disk_free / (1024**3), 2),
|
| 80 |
-
"assumptions": {"mem_per_worker_gib":
|
| 81 |
"recommendation": {
|
| 82 |
"workers_per_node": recommended_workers,
|
| 83 |
"num_shards_suggestion": recommended_shards,
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
+
"""Probe local HPC resources and suggest safe EDA concurrency settings.
|
| 3 |
+
|
| 4 |
+
This script dynamically calculates memory per worker from the EDA config file
|
| 5 |
+
(configs/eda_optimized.yaml) using max_memory_gib / max_workers.
|
| 6 |
+
"""
|
| 7 |
|
| 8 |
from __future__ import annotations
|
| 9 |
|
|
|
|
| 13 |
import platform
|
| 14 |
import shutil
|
| 15 |
from pathlib import Path
|
| 16 |
+
import yaml
|
| 17 |
|
| 18 |
|
| 19 |
def _mem_available_gib() -> float:
|
|
|
|
| 53 |
default=Path("/project/GOV108018"),
|
| 54 |
help="Path to check disk usage for.",
|
| 55 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
args = parser.parse_args()
|
| 57 |
|
| 58 |
+
# Read config to calculate dynamic mem_per_worker_gib
|
| 59 |
+
config_path = Path(__file__).parent.parent / "configs" / "eda_optimized.yaml"
|
| 60 |
+
with open(config_path) as f:
|
| 61 |
+
config = yaml.safe_load(f)
|
| 62 |
+
max_memory_gib = config['resources']['max_memory_gib']
|
| 63 |
+
max_workers = config['resources']['max_workers']
|
| 64 |
+
mem_per_worker_gib = max_memory_gib / max_workers
|
| 65 |
+
|
| 66 |
cpu_count = os.cpu_count() or 1
|
| 67 |
mem_total_gib = _mem_total_gib()
|
| 68 |
mem_available_gib = _mem_available_gib()
|
|
|
|
| 71 |
recommended_workers = _recommend_workers(
|
| 72 |
cpu_count=cpu_count,
|
| 73 |
mem_available_gib=mem_available_gib,
|
| 74 |
+
mem_per_worker_gib=mem_per_worker_gib,
|
| 75 |
)
|
| 76 |
recommended_shards = max(1, min(8, cpu_count // max(1, recommended_workers)))
|
| 77 |
|
|
|
|
| 84 |
"disk_total_gib": round(disk_total / (1024**3), 2),
|
| 85 |
"disk_used_gib": round(disk_used / (1024**3), 2),
|
| 86 |
"disk_free_gib": round(disk_free / (1024**3), 2),
|
| 87 |
+
"assumptions": {"mem_per_worker_gib": mem_per_worker_gib},
|
| 88 |
"recommendation": {
|
| 89 |
"workers_per_node": recommended_workers,
|
| 90 |
"num_shards_suggestion": recommended_shards,
|