File size: 8,963 Bytes
838f737 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
from collections import OrderedDict
from typing import Any
import torch
from ._utils import _device_t, _get_device_index
__all__ = [
"empty_cache",
"max_memory_allocated",
"max_memory_reserved",
"memory_allocated",
"memory_reserved",
"memory_stats",
"reset_accumulated_memory_stats",
"reset_peak_memory_stats",
]
def empty_cache() -> None:
r"""Release all unoccupied cached memory currently held by the caching
allocator so that those can be used in other application.
.. note:: This function is a no-op if the memory allocator for the current
:ref:`accelerator <accelerators>` has not been initialized.
"""
if not torch._C._accelerator_isAllocatorInitialized():
return
torch._C._accelerator_emptyCache()
def memory_stats(device_index: _device_t = None, /) -> OrderedDict[str, Any]:
r"""Return a dictionary of accelerator device memory allocator statistics for a given device index.
The return value of this function is a dictionary of statistics, each of
which is a non-negative integer.
Core statistics:
- ``"allocated.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
number of allocation requests received by the memory allocator.
- ``"allocated_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
amount of allocated memory.
- ``"segment.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
number of reserved segments from device memory allocation.
- ``"reserved_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
amount of reserved memory.
- ``"active.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
number of active memory blocks.
- ``"active_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
amount of active memory.
- ``"inactive_split.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
number of inactive, non-releasable memory blocks.
- ``"inactive_split_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
amount of inactive, non-releasable memory.
For these core statistics, values are broken down as follows.
Pool type:
- ``all``: combined statistics across all memory pools.
- ``large_pool``: statistics for the large allocation pool
(as of June 2025, for size >= 1MB allocations).
- ``small_pool``: statistics for the small allocation pool
(as of June 2025, for size < 1MB allocations).
Metric type:
- ``current``: current value of this metric.
- ``peak``: maximum value of this metric.
- ``allocated``: historical total increase in this metric.
- ``freed``: historical total decrease in this metric.
In addition to the core statistics, we also provide some simple event
counters:
- ``"num_alloc_retries"``: number of failed device memory allocation calls that
result in a cache flush and retry.
- ``"num_ooms"``: number of out-of-memory errors thrown.
- ``"num_sync_all_streams"``: number of ``synchronize_and_free_events`` calls.
- ``"num_device_alloc"``: number of device memory allocation calls.
- ``"num_device_free"``: number of device memory free calls.
Args:
device_index (:class:`torch.device`, str, int, optional): the index of the device to target.
If not given, use :func:`torch.accelerator.current_device_index` by default.
If a :class:`torch.device` or str is provided, its type must match the current
:ref:`accelerator<accelerators>` device type.
"""
if not torch._C._accelerator_isAllocatorInitialized():
return OrderedDict()
device_index = _get_device_index(device_index, optional=True)
stats = torch._C._accelerator_getDeviceStats(device_index)
flat_stats = []
def flatten(prefix: str, value: Any) -> None:
if isinstance(value, dict):
for k, v in value.items():
nested_prefix = f"{prefix}.{k}" if prefix else k
flatten(nested_prefix, v)
else:
flat_stats.append((prefix, value))
flatten("", stats)
flat_stats.sort()
return OrderedDict(flat_stats)
def memory_allocated(device_index: _device_t = None, /) -> int:
r"""Return the current :ref:`accelerator<accelerators>` device memory occupied by tensors
in bytes for a given device index.
Args:
device_index (:class:`torch.device`, str, int, optional): the index of the device to target.
If not given, use :func:`torch.accelerator.current_device_index` by default.
If a :class:`torch.device` or str is provided, its type must match the current
:ref:`accelerator<accelerators>` device type.
"""
return memory_stats(device_index).get("allocated_bytes.all.current", 0)
def max_memory_allocated(device_index: _device_t = None, /) -> int:
r"""Return the current :ref:`accelerator<accelerators>` maximum device memory occupied by tensors
in bytes for a given device index.
By default, this returns the peak allocated memory since the beginning of
this program. :func:`~torch.accelerator.reset_peak_memory_stats` can be used to
reset the starting point in tracking this metric.
Args:
device_index (:class:`torch.device`, str, int, optional): the index of the device to target.
If not given, use :func:`torch.accelerator.current_device_index` by default.
If a :class:`torch.device` or str is provided, its type must match the current
:ref:`accelerator<accelerators>` device type.
"""
return memory_stats(device_index).get("allocated_bytes.all.peak", 0)
def memory_reserved(device_index: _device_t = None, /) -> int:
r"""Return the current :ref:`accelerator<accelerators>` device memory managed by the caching allocator
in bytes for a given device index.
Args:
device_index (:class:`torch.device`, str, int, optional): the index of the device to target.
If not given, use :func:`torch.accelerator.current_device_index` by default.
If a :class:`torch.device` or str is provided, its type must match the current
:ref:`accelerator<accelerators>` device type.
"""
return memory_stats(device_index).get("reserved_bytes.all.current", 0)
def max_memory_reserved(device_index: _device_t = None, /) -> int:
r"""Return the current :ref:`accelerator<accelerators>` maximum device memory managed by the caching allocator
in bytes for a given device index.
By default, this returns the peak cached memory since the beginning of this
program. :func:`~torch.accelerator.reset_peak_memory_stats` can be used to reset
the starting point in tracking this metric.
Args:
device_index (:class:`torch.device`, str, int, optional): the index of the device to target.
If not given, use :func:`torch.accelerator.current_device_index` by default.
If a :class:`torch.device` or str is provided, its type must match the current
:ref:`accelerator<accelerators>` device type.
"""
return memory_stats(device_index).get("reserved_bytes.all.peak", 0)
def reset_accumulated_memory_stats(device_index: _device_t = None, /) -> None:
r"""Reset the "accumulated" (historical) stats tracked by the current :ref:`accelerator<accelerators>`
memory allocator for a given device index.
Args:
device_index (:class:`torch.device`, str, int, optional): the index of the device to target.
If not given, use :func:`torch.accelerator.current_device_index` by default.
If a :class:`torch.device` or str is provided, its type must match the current
:ref:`accelerator<accelerators>` device type.
.. note:: This function is a no-op if the memory allocator for the current
:ref:`accelerator <accelerators>` has not been initialized.
"""
device_index = _get_device_index(device_index, optional=True)
return torch._C._accelerator_resetAccumulatedStats(device_index)
def reset_peak_memory_stats(device_index: _device_t = None, /) -> None:
r"""Reset the "peak" stats tracked by the current :ref:`accelerator<accelerators>`
memory allocator for a given device index.
Args:
device_index (:class:`torch.device`, str, int, optional): the index of the device to target.
If not given, use :func:`torch.accelerator.current_device_index` by default.
If a :class:`torch.device` or str is provided, its type must match the current
:ref:`accelerator<accelerators>` device type.
.. note:: This function is a no-op if the memory allocator for the current
:ref:`accelerator <accelerators>` has not been initialized.
"""
device_index = _get_device_index(device_index, optional=True)
return torch._C._accelerator_resetPeakStats(device_index)
|