DeMemWM / utils /cluster_utils.py
BonanDing's picture
Initial commit
b47a1ce
"""
utils for submitting to clusters, such as slurm
"""
import os
from omegaconf import DictConfig, OmegaConf
from datetime import datetime
from pathlib import Path
from utils.print_utils import cyan
# This is set below.
REPO_DIR = None
def submit_slurm_job(
cfg: DictConfig,
python_args: str,
project_root: Path,
):
log_dir = project_root / "slurm_logs" / f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}-{cfg.name}"
log_dir.mkdir(exist_ok=True, parents=True)
(project_root / "slurm_logs" / "latest").unlink(missing_ok=True)
(project_root / "slurm_logs" / "latest").symlink_to(log_dir, target_is_directory=True)
params = dict(name=cfg.name, log_dir=log_dir, project_root=project_root, python_args=python_args)
params.update(cfg.cluster.params)
slurm_script = cfg.cluster.launch_template.format(**params)
slurm_script_path = log_dir / "job.slurm"
with slurm_script_path.open("w") as f:
f.write(slurm_script)
os.system(f"chmod +x {slurm_script_path}")
os.system(f"sbatch {slurm_script_path}")
print(f"\n{cyan('script:')} {slurm_script_path}\n{cyan('slurm errors and logs:')} {log_dir}\n")
return log_dir