| import os |
| from joblib import cpu_count |
|
|
|
|
| # Module level cache for cpu_count as we do not expect this to change during |
| # the lifecycle of a Python program. This dictionary is keyed by |
| # only_physical_cores. |
| _CPU_COUNTS = {} |
|
|
|
|
| def _openmp_parallelism_enabled(): |
| "" |
|
|
| It allows to retrieve at runtime the information gathered at compile time. |
| "" |
| # SKLEARN_OPENMP_PARALLELISM_ENABLED is resolved at compile time and defined |
| # in _openmp_helpers.pxd as a boolean. This function exposes it to Python. |
| return SKLEARN_OPENMP_PARALLELISM_ENABLED |
|
|
|
|
| cpdef _openmp_effective_n_threads(n_threads=None, only_physical_cores=True): |
| "" |
|
|
| - For ``n_threads = None``, |
| - if the ``OMP_NUM_THREADS`` environment variable is set, return |
| ``openmp.omp_get_max_threads()`` |
| - otherwise, return the minimum between ``openmp.omp_get_max_threads()`` |
| and the number of cpus, taking cgroups quotas into account. Cgroups |
| quotas can typically be set by tools such as Docker. |
| The result of ``omp_get_max_threads`` can be influenced by environment |
| variable ``OMP_NUM_THREADS`` or at runtime by ``omp_set_num_threads``. |
|
|
| - For ``n_threads > 0``, return this as the maximal number of threads for |
| parallel OpenMP calls. |
|
|
| - For ``n_threads < 0``, return the maximal number of threads minus |
| ``|n_threads + 1|``. In particular ``n_threads = -1`` will use as many |
| threads as there are available cores on the machine. |
|
|
| - Raise a ValueError for ``n_threads = 0``. |
|
|
| Passing the `only_physical_cores=False` flag makes it possible to use extra |
| threads for SMT/HyperThreading logical cores. It has been empirically |
| observed that using as many threads as available SMT cores can slightly |
| improve the performance in some cases, but can severely degrade |
| performance other times. Therefore it is recommended to use |
| `only_physical_cores=True` unless an empirical study has been conducted to |
| assess the impact of SMT on a case-by-case basis (using various input data |
| shapes, in particular small data shapes). |
|
|
| If scikit-learn is built without OpenMP support, always return 1. |
| "" |
| if n_threads == 0: |
| raise ValueError("n_threads = 0 is invalid") |
|
|
| if not SKLEARN_OPENMP_PARALLELISM_ENABLED: |
| # OpenMP disabled at build-time => sequential mode |
| return 1 |
|
|
| if os.getenv("OMP_NUM_THREADS"): |
| # Fall back to user provided number of threads making it possible |
| # to exceed the number of cpus. |
| max_n_threads = omp_get_max_threads() |
| else: |
| try: |
| n_cpus = _CPU_COUNTS[only_physical_cores] |
| except KeyError: |
| n_cpus = cpu_count(only_physical_cores=only_physical_cores) |
| _CPU_COUNTS[only_physical_cores] = n_cpus |
| max_n_threads = min(omp_get_max_threads(), n_cpus) |
|
|
| if n_threads is None: |
| return max_n_threads |
| elif n_threads < 0: |
| return max(1, max_n_threads + n_threads + 1) |
|
|
| return n_threads |
|
|