| |
| |
| |
| |
| |
|
|
| """ |
| Utility functions for SLURM configuration and cluster settings. |
| """ |
|
|
| from enum import Enum |
| import os |
| import socket |
| import typing as tp |
|
|
| import omegaconf |
|
|
|
|
| class ClusterType(Enum): |
| AWS = "aws" |
| FAIR = "fair" |
| RSC = "rsc" |
| LOCAL_DARWIN = "darwin" |
| DEFAULT = "default" |
|
|
|
|
| def _guess_cluster_type() -> ClusterType: |
| uname = os.uname() |
| fqdn = socket.getfqdn() |
| if uname.sysname == "Linux" and (uname.release.endswith("-aws") or ".ec2" in fqdn): |
| return ClusterType.AWS |
|
|
| if fqdn.endswith(".fair"): |
| return ClusterType.FAIR |
|
|
| if fqdn.endswith(".facebook.com"): |
| return ClusterType.RSC |
|
|
| if uname.sysname == "Darwin": |
| return ClusterType.LOCAL_DARWIN |
|
|
| return ClusterType.DEFAULT |
|
|
|
|
| def get_cluster_type( |
| cluster_type: tp.Optional[ClusterType] = None, |
| ) -> tp.Optional[ClusterType]: |
| if cluster_type is None: |
| return _guess_cluster_type() |
|
|
| return cluster_type |
|
|
|
|
| def get_slurm_parameters( |
| cfg: omegaconf.DictConfig, cluster_type: tp.Optional[ClusterType] = None |
| ) -> omegaconf.DictConfig: |
| """Update SLURM parameters in configuration based on cluster type. |
| If the cluster type is not specify, it infers it automatically. |
| """ |
| from ..environment import AudioCraftEnvironment |
| cluster_type = get_cluster_type(cluster_type) |
| |
| if cluster_type == ClusterType.AWS: |
| cfg["mem_per_gpu"] = None |
| cfg["constraint"] = None |
| cfg["setup"] = [] |
| elif cluster_type == ClusterType.RSC: |
| cfg["mem_per_gpu"] = None |
| cfg["setup"] = [] |
| cfg["constraint"] = None |
| cfg["partition"] = "learn" |
| slurm_exclude = AudioCraftEnvironment.get_slurm_exclude() |
| if slurm_exclude is not None: |
| cfg["exclude"] = slurm_exclude |
| return cfg |
|
|