cranky-coder08 commited on Sep 10

Commit

59f1501

verified ·

1 Parent(s): 44823a3

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
phivenv/Lib/site-packages/torch/_C/_VariableFunctions.pyi +0 -0
phivenv/Lib/site-packages/torch/_C/__init__.pyi +0 -0
phivenv/Lib/site-packages/torch/_C/_aoti.pyi +164 -0
phivenv/Lib/site-packages/torch/_C/_autograd.pyi +141 -0
phivenv/Lib/site-packages/torch/_C/_cpu.pyi +13 -0
phivenv/Lib/site-packages/torch/_C/_cudnn.pyi +14 -0
phivenv/Lib/site-packages/torch/_C/_cusparselt.pyi +1 -0
phivenv/Lib/site-packages/torch/_C/_distributed_autograd.pyi +26 -0
phivenv/Lib/site-packages/torch/_C/_distributed_c10d.pyi +797 -0
phivenv/Lib/site-packages/torch/_C/_distributed_rpc.pyi +188 -0
phivenv/Lib/site-packages/torch/_C/_distributed_rpc_testing.pyi +32 -0
phivenv/Lib/site-packages/torch/_C/_dynamo/__init__.pyi +4 -0
phivenv/Lib/site-packages/torch/_C/_dynamo/compiled_autograd.pyi +13 -0
phivenv/Lib/site-packages/torch/_C/_dynamo/eval_frame.pyi +71 -0
phivenv/Lib/site-packages/torch/_C/_dynamo/guards.pyi +191 -0
phivenv/Lib/site-packages/torch/_C/_export/__init__.pyi +9 -0
phivenv/Lib/site-packages/torch/_C/_export/pt2_archive_constants.pyi +22 -0
phivenv/Lib/site-packages/torch/_C/_functions.pyi +19 -0
phivenv/Lib/site-packages/torch/_C/_functorch.pyi +86 -0
phivenv/Lib/site-packages/torch/_C/_instruction_counter.pyi +4 -0
phivenv/Lib/site-packages/torch/_C/_itt.pyi +5 -0
phivenv/Lib/site-packages/torch/_C/_lazy.pyi +26 -0
phivenv/Lib/site-packages/torch/_C/_lazy_ts_backend.pyi +12 -0
phivenv/Lib/site-packages/torch/_C/_monitor.pyi +58 -0
phivenv/Lib/site-packages/torch/_C/_nn.pyi +175 -0
phivenv/Lib/site-packages/torch/_C/_nvtx.pyi +9 -0
phivenv/Lib/site-packages/torch/_C/_onnx.pyi +39 -0
phivenv/Lib/site-packages/torch/_C/_profiler.pyi +246 -0
phivenv/Lib/site-packages/torch/_C/_verbose.pyi +3 -0
phivenv/Lib/site-packages/torch/_C_flatbuffer/__init__.pyi +11 -0
phivenv/Lib/site-packages/torch/_awaits/__init__.py +53 -0
phivenv/Lib/site-packages/torch/_awaits/__pycache__/__init__.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/_custom_op/__init__.py +0 -0
phivenv/Lib/site-packages/torch/_custom_op/__pycache__/__init__.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/_custom_op/__pycache__/autograd.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/_custom_op/__pycache__/impl.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/_custom_op/autograd.py +307 -0
phivenv/Lib/site-packages/torch/_custom_op/impl.py +715 -0
phivenv/Lib/site-packages/torch/_decomp/__init__.py +544 -0
phivenv/Lib/site-packages/torch/_decomp/__pycache__/__init__.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/_decomp/__pycache__/decompositions_for_jvp.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/_decomp/__pycache__/decompositions_for_rng.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/_decomp/decompositions.py +0 -0
phivenv/Lib/site-packages/torch/_decomp/decompositions_for_jvp.py +335 -0
phivenv/Lib/site-packages/torch/_decomp/decompositions_for_rng.py +266 -0
phivenv/Lib/site-packages/torch/_dispatch/__init__.py +0 -0
phivenv/Lib/site-packages/torch/_dispatch/__pycache__/__init__.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/_dispatch/__pycache__/python.cpython-39.pyc +0 -0
phivenv/Lib/site-packages/torch/_dispatch/python.py +192 -0

.gitattributes CHANGED Viewed

@@ -112,3 +112,6 @@ phivenv/Lib/site-packages/torch/lib/cpuinfo.lib filter=lfs diff=lfs merge=lfs -t
 phivenv/Lib/site-packages/torch/lib/fbgemm.dll filter=lfs diff=lfs merge=lfs -text
 phivenv/Lib/site-packages/torch/lib/c10.dll filter=lfs diff=lfs merge=lfs -text
 phivenv/Lib/site-packages/torch/lib/fbgemm.lib filter=lfs diff=lfs merge=lfs -text

 phivenv/Lib/site-packages/torch/lib/fbgemm.dll filter=lfs diff=lfs merge=lfs -text
 phivenv/Lib/site-packages/torch/lib/c10.dll filter=lfs diff=lfs merge=lfs -text
 phivenv/Lib/site-packages/torch/lib/fbgemm.lib filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/torch/lib/fmt.lib filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/torch/lib/libiomp5md.dll filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/torch/lib/libittnotify.lib filter=lfs diff=lfs merge=lfs -text

phivenv/Lib/site-packages/torch/_C/_VariableFunctions.pyi ADDED Viewed

The diff for this file is too large to render. See raw diff

phivenv/Lib/site-packages/torch/_C/__init__.pyi ADDED Viewed

The diff for this file is too large to render. See raw diff

phivenv/Lib/site-packages/torch/_C/_aoti.pyi ADDED Viewed

	@@ -0,0 +1,164 @@

+from ctypes import c_void_p
+from typing import overload, Protocol
+from torch import Tensor
+# Defined in torch/csrc/inductor/aoti_runner/pybind.cpp
+# Tensor to AtenTensorHandle
+def unsafe_alloc_void_ptrs_from_tensors(tensors: list[Tensor]) -> list[c_void_p]: ...
+def unsafe_alloc_void_ptr_from_tensor(tensor: Tensor) -> c_void_p: ...
+# AtenTensorHandle to Tensor
+def alloc_tensors_by_stealing_from_void_ptrs(
+    handles: list[c_void_p],
+) -> list[Tensor]: ...
+def alloc_tensor_by_stealing_from_void_ptr(
+    handle: c_void_p,
+) -> Tensor: ...
+class AOTIModelContainerRunner(Protocol):
+    def run(
+        self, inputs: list[Tensor], stream_handle: c_void_p = ...
+    ) -> list[Tensor]: ...
+    def get_call_spec(self) -> list[str]: ...
+    def get_constant_names_to_original_fqns(self) -> dict[str, str]: ...
+    def get_constant_names_to_dtypes(self) -> dict[str, int]: ...
+    def extract_constants_map(self, use_inactive: bool) -> dict[str, Tensor]: ...
+    def update_constant_buffer(
+        self,
+        tensor_map: dict[str, Tensor],
+        use_inactive: bool,
+        validate_full_updates: bool,
+        user_managed: bool = ...,
+    ) -> None: ...
+    def swap_constant_buffer(self) -> None: ...
+    def free_inactive_constant_buffer(self) -> None: ...
+class AOTIModelContainerRunnerCpu:
+    def __init__(self, model_so_path: str, num_models: int) -> None: ...
+    def run(
+        self, inputs: list[Tensor], stream_handle: c_void_p = ...
+    ) -> list[Tensor]: ...
+    def get_call_spec(self) -> list[str]: ...
+    def get_constant_names_to_original_fqns(self) -> dict[str, str]: ...
+    def get_constant_names_to_dtypes(self) -> dict[str, int]: ...
+    def extract_constants_map(self, use_inactive: bool) -> dict[str, Tensor]: ...
+    def update_constant_buffer(
+        self,
+        tensor_map: dict[str, Tensor],
+        use_inactive: bool,
+        validate_full_updates: bool,
+        user_managed: bool = ...,
+    ) -> None: ...
+    def swap_constant_buffer(self) -> None: ...
+    def free_inactive_constant_buffer(self) -> None: ...
+class AOTIModelContainerRunnerCuda:
+    @overload
+    def __init__(self, model_so_path: str, num_models: int) -> None: ...
+    @overload
+    def __init__(
+        self, model_so_path: str, num_models: int, device_str: str
+    ) -> None: ...
+    @overload
+    def __init__(
+        self, model_so_path: str, num_models: int, device_str: str, cubin_dir: str
+    ) -> None: ...
+    def run(
+        self, inputs: list[Tensor], stream_handle: c_void_p = ...
+    ) -> list[Tensor]: ...
+    def get_call_spec(self) -> list[str]: ...
+    def get_constant_names_to_original_fqns(self) -> dict[str, str]: ...
+    def get_constant_names_to_dtypes(self) -> dict[str, int]: ...
+    def extract_constants_map(self, use_inactive: bool) -> dict[str, Tensor]: ...
+    def update_constant_buffer(
+        self,
+        tensor_map: dict[str, Tensor],
+        use_inactive: bool,
+        validate_full_updates: bool,
+        user_managed: bool = ...,
+    ) -> None: ...
+    def swap_constant_buffer(self) -> None: ...
+    def free_inactive_constant_buffer(self) -> None: ...
+class AOTIModelContainerRunnerXpu:
+    @overload
+    def __init__(self, model_so_path: str, num_models: int) -> None: ...
+    @overload
+    def __init__(
+        self, model_so_path: str, num_models: int, device_str: str
+    ) -> None: ...
+    @overload
+    def __init__(
+        self, model_so_path: str, num_models: int, device_str: str, kernel_bin_dir: str
+    ) -> None: ...
+    def run(
+        self, inputs: list[Tensor], stream_handle: c_void_p = ...
+    ) -> list[Tensor]: ...
+    def get_call_spec(self) -> list[str]: ...
+    def get_constant_names_to_original_fqns(self) -> dict[str, str]: ...
+    def get_constant_names_to_dtypes(self) -> dict[str, int]: ...
+    def extract_constants_map(self, use_inactive: bool) -> dict[str, Tensor]: ...
+    def update_constant_buffer(
+        self,
+        tensor_map: dict[str, Tensor],
+        use_inactive: bool,
+        validate_full_updates: bool,
+        user_managed: bool = ...,
+    ) -> None: ...
+    def swap_constant_buffer(self) -> None: ...
+    def free_inactive_constant_buffer(self) -> None: ...
+class AOTIModelContainerRunnerMps:
+    def __init__(self, model_so_path: str, num_models: int) -> None: ...
+    def run(
+        self, inputs: list[Tensor], stream_handle: c_void_p = ...
+    ) -> list[Tensor]: ...
+    def get_call_spec(self) -> list[str]: ...
+    def get_constant_names_to_original_fqns(self) -> dict[str, str]: ...
+    def get_constant_names_to_dtypes(self) -> dict[str, int]: ...
+    def extract_constants_map(self, use_inactive: bool) -> dict[str, Tensor]: ...
+    def update_constant_buffer(
+        self,
+        tensor_map: dict[str, Tensor],
+        use_inactive: bool,
+        validate_full_updates: bool,
+        user_managed: bool = ...,
+    ) -> None: ...
+    def swap_constant_buffer(self) -> None: ...
+    def free_inactive_constant_buffer(self) -> None: ...
+# Defined in torch/csrc/inductor/aoti_package/pybind.cpp
+class AOTIModelPackageLoader:
+    def __init__(
+        self,
+        model_package_path: str,
+        model_name: str,
+        run_single_threaded: bool,
+        num_runners: int,
+        device_index: int,
+    ) -> None: ...
+    def get_metadata(self) -> dict[str, str]: ...
+    def run(
+        self, inputs: list[Tensor], stream_handle: c_void_p = ...
+    ) -> list[Tensor]: ...
+    def boxed_run(
+        self, inputs: list[Tensor], stream_handle: c_void_p = ...
+    ) -> list[Tensor]: ...
+    def get_call_spec(self) -> list[str]: ...
+    def get_constant_fqns(self) -> list[str]: ...
+    def load_constants(
+        self,
+        constants_map: dict[str, Tensor],
+        use_inactive: bool,
+        check_full_update: bool,
+        user_managed: bool = ...,
+    ) -> None: ...
+    def update_constant_buffer(
+        self,
+        tensor_map: dict[str, Tensor],
+        use_inactive: bool,
+        validate_full_updates: bool,
+        user_managed: bool = ...,
+    ) -> None: ...

phivenv/Lib/site-packages/torch/_C/_autograd.pyi ADDED Viewed

	@@ -0,0 +1,141 @@

+# mypy: allow-untyped-defs
+from enum import Enum
+from typing import Any, Callable
+import torch
+from torch._C._profiler import (
+    _ProfilerEvent,
+    ActiveProfilerType,
+    ProfilerActivity,
+    ProfilerConfig,
+)
+# Defined in torch/csrc/autograd/init.cpp
+class DeviceType(Enum):
+    CPU = ...
+    CUDA = ...
+    XPU = ...
+    MKLDNN = ...
+    OPENGL = ...
+    OPENCL = ...
+    IDEEP = ...
+    HIP = ...
+    FPGA = ...
+    MAIA = ...
+    XLA = ...
+    MTIA = ...
+    MPS = ...
+    HPU = ...
+    Meta = ...
+    Vulkan = ...
+    Metal = ...
+    PrivateUse1 = ...
+class ProfilerEvent:
+    def cpu_elapsed_us(self, other: ProfilerEvent) -> float: ...
+    def cpu_memory_usage(self) -> int: ...
+    def cuda_elapsed_us(self, other: ProfilerEvent) -> float: ...
+    def privateuse1_elapsed_us(self, other: ProfilerEvent) -> float: ...
+    def cuda_memory_usage(self) -> int: ...
+    def device(self) -> int: ...
+    def handle(self) -> int: ...
+    def has_cuda(self) -> bool: ...
+    def is_remote(self) -> bool: ...
+    def kind(self) -> int: ...
+    def name(self) -> str: ...
+    def node_id(self) -> int: ...
+    def sequence_nr(self) -> int: ...
+    def shapes(self) -> list[list[int]]: ...
+    def thread_id(self) -> int: ...
+    def flops(self) -> float: ...
+    def is_async(self) -> bool: ...
+class _KinetoEvent:
+    def name(self) -> str: ...
+    def overload_name(self) -> str: ...
+    def device_index(self) -> int: ...
+    def device_resource_id(self) -> int: ...
+    def start_ns(self) -> int: ...
+    def end_ns(self) -> int: ...
+    def duration_ns(self) -> int: ...
+    def is_async(self) -> bool: ...
+    def linked_correlation_id(self) -> int: ...
+    def shapes(self) -> list[list[int]]: ...
+    def dtypes(self) -> list[str]: ...
+    def concrete_inputs(self) -> list[Any]: ...
+    def kwinputs(self) -> dict[str, Any]: ...
+    def device_type(self) -> DeviceType: ...
+    def start_thread_id(self) -> int: ...
+    def end_thread_id(self) -> int: ...
+    def correlation_id(self) -> int: ...
+    def fwd_thread_id(self) -> int: ...
+    def stack(self) -> list[str]: ...
+    def scope(self) -> int: ...
+    def sequence_nr(self) -> int: ...
+    def flops(self) -> int: ...
+    def cuda_elapsed_us(self) -> int: ...
+    def privateuse1_elapsed_us(self) -> int: ...
+    def is_user_annotation(self) -> bool: ...
+class _ProfilerResult:
+    def events(self) -> list[_KinetoEvent]: ...
+    def legacy_events(self) -> list[list[ProfilerEvent]]: ...
+    def save(self, path: str) -> None: ...
+    def experimental_event_tree(self) -> list[_ProfilerEvent]: ...
+    def trace_start_ns(self) -> int: ...
+class SavedTensor: ...
+def _enable_profiler(
+    config: ProfilerConfig,
+    activities: set[ProfilerActivity],
+) -> None: ...
+def _prepare_profiler(
+    config: ProfilerConfig,
+    activities: set[ProfilerActivity],
+) -> None: ...
+def _toggle_collection_dynamic(
+    enable: bool,
+    activities: set[ProfilerActivity],
+) -> None: ...
+def _disable_profiler() -> _ProfilerResult: ...
+def _profiler_enabled() -> bool: ...
+def _add_metadata_json(key: str, value: str) -> None: ...
+def _kineto_step() -> None: ...
+def _get_current_graph_task_keep_graph() -> bool: ...
+def _get_sequence_nr() -> int: ...
+def kineto_available() -> bool: ...
+def _record_function_with_args_enter(name: str, *args) -> torch.Tensor: ...
+def _record_function_with_args_exit(handle: torch.Tensor) -> None: ...
+def _supported_activities() -> set[ProfilerActivity]: ...
+def _enable_record_function(enable: bool) -> None: ...
+def _set_empty_test_observer(is_global: bool, sampling_prob: float) -> None: ...
+def _push_saved_tensors_default_hooks(
+    pack_hook: Callable[[torch.Tensor], Any],
+    unpack_hook: Callable[[Any], torch.Tensor],
+) -> None: ...
+def _pop_saved_tensors_default_hooks() -> None: ...
+def _top_saved_tensors_default_hooks(
+    ignore_is_tracing: bool,
+) -> tuple[Callable[[torch.Tensor], Any], Callable[[Any], torch.Tensor]]: ...
+def _unsafe_set_version_counter(
+    t: tuple[torch.Tensor, ...], prev_version: tuple[int, ...]
+) -> None: ...
+def _enable_profiler_legacy(config: ProfilerConfig) -> None: ...
+def _disable_profiler_legacy() -> list[list[ProfilerEvent]]: ...
+def _profiler_type() -> ActiveProfilerType: ...
+def _saved_tensors_hooks_enable() -> None: ...
+def _saved_tensors_hooks_disable(message: str, fail_if_non_empty=True) -> None: ...
+def _saved_tensors_hooks_get_disabled_error_message() -> str | None: ...
+def _saved_tensors_hooks_set_tracing(is_tracing: bool) -> bool: ...
+class CreationMeta(Enum):
+    DEFAULT = ...
+    IN_CUSTOM_FUNCTION = ...
+    MULTI_OUTPUT_NODE = ...
+    NO_GRAD_MODE = ...
+    INFERENCE_MODE = ...
+def _set_creation_meta(t: torch.Tensor, creation_meta: CreationMeta) -> None: ...
+def _get_creation_meta(t: torch.Tensor) -> CreationMeta: ...

phivenv/Lib/site-packages/torch/_C/_cpu.pyi ADDED Viewed

	@@ -0,0 +1,13 @@

+from torch.types import _bool, _int
+# Defined in torch/csrc/cpu/Module.cpp
+def _is_avx2_supported() -> _bool: ...
+def _is_avx512_supported() -> _bool: ...
+def _is_avx512_vnni_supported() -> _bool: ...
+def _is_avx512_bf16_supported() -> _bool: ...
+def _is_amx_tile_supported() -> _bool: ...
+def _is_amx_fp16_supported() -> _bool: ...
+def _init_amx() -> _bool: ...
+def _L1d_cache_size() -> _int: ...
+def _L2_cache_size() -> _int: ...

phivenv/Lib/site-packages/torch/_C/_cudnn.pyi ADDED Viewed

	@@ -0,0 +1,14 @@

+from enum import IntEnum
+# Defined in torch/csrc/cuda/shared/cudnn.cpp
+is_cuda: bool
+def getRuntimeVersion() -> tuple[int, int, int]: ...
+def getCompileVersion() -> tuple[int, int, int]: ...
+def getVersionInt() -> int: ...
+class RNNMode(IntEnum):
+    rnn_relu = ...
+    rnn_tanh = ...
+    lstm = ...
+    gru = ...

phivenv/Lib/site-packages/torch/_C/_cusparselt.pyi ADDED Viewed

	@@ -0,0 +1 @@


1	+ def getVersionInt() -> int: ...

phivenv/Lib/site-packages/torch/_C/_distributed_autograd.pyi ADDED Viewed

	@@ -0,0 +1,26 @@

+from typing import Any
+import torch
+# This module is defined in torch/csrc/distributed/autograd/init.cpp
+class DistAutogradContext:
+    def _context_id(self) -> int: ...
+    def _recv_functions(self) -> dict[int, Any]: ...
+    def _send_functions(self) -> dict[int, Any]: ...
+    def _known_worker_ids(self) -> set[int]: ...
+def _new_context() -> DistAutogradContext: ...
+def _release_context(context_id: int) -> None: ...
+def _get_max_id() -> int: ...
+def _is_valid_context(worker_id: int) -> bool: ...
+def _retrieve_context(context_id: int) -> DistAutogradContext: ...
+def _current_context() -> DistAutogradContext: ...
+def _init(worker_id: int) -> None: ...
+def _get_debug_info() -> dict[str, str]: ...
+def backward(
+    context_id: int,
+    roots: list[torch.Tensor],
+    retain_graph: bool = False,
+) -> None: ...
+def get_gradients(context_id: int) -> dict[torch.Tensor, torch.Tensor]: ...

phivenv/Lib/site-packages/torch/_C/_distributed_c10d.pyi ADDED Viewed

	@@ -0,0 +1,797 @@

+# mypy: allow-untyped-defs
+# mypy: disable-error-code="type-arg"
+from datetime import timedelta
+from enum import Enum
+from typing import Any, Optional, overload, Union
+import torch
+from torch import Tensor
+from torch._C import ScriptObject
+from torch._C._autograd import DeviceType
+from torch.futures import Future
+# This module is defined in torch/csrc/distributed/c10d/init.cpp
+_DEFAULT_FIRST_BUCKET_BYTES: int
+_DEFAULT_NO_TIMEOUT: timedelta
+_DEFAULT_PG_TIMEOUT: timedelta
+_DEFAULT_PG_NCCL_TIMEOUT: timedelta
+class BuiltinCommHookType(Enum):
+    ALLREDUCE = ...
+    FP16_COMPRESS = ...
+def _register_comm_hook(reducer: Reducer, state: Any, comm_hook: Any): ...
+def _register_builtin_comm_hook(
+    reducer: Reducer,
+    comm_hook_type: BuiltinCommHookType,
+): ...
+def _set_global_rank(rank: int) -> None: ...
+def _hash_tensors(tensors: list[Tensor]) -> int: ...
+class GradBucket:
+    def index(self) -> int: ...
+    def buffer(self) -> Tensor: ...
+    def gradients(self) -> list[Tensor]: ...
+    def is_last(self) -> bool: ...
+    def set_buffer(self, tensor: Tensor) -> None: ...
+    def parameters(self) -> list[Tensor]: ...
+class Reducer:
+    def __init__(
+        self,
+        params: list[Tensor],
+        bucket_indices: list[list[int]],
+        per_bucket_size_limits: list[int],
+        process_group: ProcessGroup,
+        expect_sparse_gradients: list[bool] = ...,
+        bucket_bytes_cap: int = ...,  # kDefaultBucketBytesCap in reducer.hpp
+        find_unused_parameters: bool = ...,
+        gradient_as_bucket_view: bool = ...,
+        param_to_name_mapping: dict[int, str] = ...,
+        first_bucket_types_cap: int = ...,  # kDefaultFirstBucketBytes in reducer.hpp
+        skip_all_reduce_unused_params: bool = ...,
+        use_python_reducer: bool = ...,
+    ) -> None: ...
+    def prepare_for_forward(self) -> None: ...
+    def prepare_for_backward(self, output: list[Tensor]) -> None: ...
+    def get_backward_stats(self) -> list[int]: ...
+    def _install_post_backward_futures(self, futures: list[Future]) -> None: ...
+    def _rebuild_buckets(self) -> bool: ...
+    def _get_zeros_like_grad_buckets(self) -> list[GradBucket]: ...
+    def _push_all_rebuilt_params(self) -> None: ...
+    def _set_forward_pass_work_handle(
+        self,
+        work: Work,
+        use_static_world_size: bool,
+    ): ...
+    def _get_local_used_map(self) -> Tensor: ...
+    def _set_ddp_runtime_logging_sample_rate(self, sample_rate: int) -> None: ...
+    def _set_static_graph(self) -> None: ...
+    def _run_comm_hook(self, bucket: GradBucket) -> Future: ...
+    def set_logger(self, logger: Logger) -> None: ...
+    def _remove_autograd_hooks(self) -> None: ...
+    def _check_reducer_finalized(self) -> None: ...
+    def _set_sparse_metadata(self, global_unique_ids: dict[str, Tensor]) -> None: ...
+    def _reset_state(self) -> None: ...
+    def _update_process_group(self, new_process_group: ProcessGroup) -> None: ...
+class DDPLoggingData:
+    strs_map: dict[str, str]
+    ints_map: dict[str, int]
+class Logger:
+    def __init__(self, reducer: Reducer) -> None: ...
+    def set_construction_data_and_log(
+        self,
+        module_name: str,
+        device_ids: list[int],
+        output_device: int,
+        broadcast_buffers: bool,
+        has_sync_bn: bool,
+        static_graph: bool,
+    ): ...
+    def set_runtime_stats_and_log(self) -> None: ...
+    def set_error_and_log(self, error: str) -> None: ...
+    def _get_ddp_logging_data(self) -> DDPLoggingData: ...
+    def _set_comm_hook_name(self, comm_hook: str) -> None: ...
+    def _set_uneven_input_join(self) -> None: ...
+    def _set_static_graph(self) -> None: ...
+class _WorkerServer:
+    def __init__(self, socket_path: str) -> None: ...
+    def shutdown(self) -> None: ...
+def get_debug_level(): ...
+def set_debug_level(): ...
+def set_debug_level_from_env(): ...
+class DebugLevel(Enum):
+    OFF = ...
+    INFO = ...
+    DETAIL = ...
+class ReduceOp:
+    def __init__(self, op: RedOpType) -> None: ...
+    SUM: RedOpType = ...
+    AVG: RedOpType = ...
+    PRODUCT: RedOpType = ...
+    MIN: RedOpType = ...
+    MAX: RedOpType = ...
+    BAND: RedOpType = ...
+    BOR: RedOpType = ...
+    BXOR: RedOpType = ...
+    PREMUL_SUM: RedOpType = ...
+    UNUSED: RedOpType = ...
+    # mypy error being ignored:
+    # Detected enum "torch._C._distributed_c10d.ReduceOp.RedOpType" in a type
+    # stub with zero members. There is a chance this is due to a recent change
+    # in the semantics of enum membership. If so, use `member = value` to mark
+    # an enum member, instead of `member: type`
+    class RedOpType(Enum): ...  # type: ignore[misc]
+class BroadcastOptions:
+    rootRank: int
+    rootTensor: int
+    timeout: timedelta
+    asyncOp: bool
+class AllreduceOptions:
+    reduceOp: ReduceOp
+    timeout: timedelta
+    asyncOp: bool
+    sparseIndices: Optional[Tensor]
+class AllreduceCoalescedOptions(AllreduceOptions): ...
+class ReduceOptions:
+    reduceOp: ReduceOp
+    rootRank: int
+    rootTensor: int
+    timeout: timedelta
+    asyncOp: bool
+class AllgatherOptions:
+    timeout: timedelta
+    asyncOp: bool
+class GatherOptions:
+    rootRank: int
+    timeout: timedelta
+    asyncOp: bool
+class ScatterOptions:
+    rootRank: int
+    timeout: timedelta
+    asyncOp: bool
+class ReduceScatterOptions:
+    reduceOp: ReduceOp
+    timeout: timedelta
+    asyncOp: bool
+class BarrierOptions:
+    device_ids: list[int]
+    device: torch.device
+    timeout: timedelta
+    asyncOp: bool
+class AllToAllOptions:
+    timeout: timedelta
+    asyncOp: bool
+class Store:
+    def set(self, key: str, value: str): ...
+    def get(self, key: str) -> bytes: ...
+    def add(self, key: str, value: int) -> int: ...
+    def check(self, keys: list[str]) -> bool: ...
+    def compare_set(
+        self,
+        key: str,
+        expected_value: str,
+        desired_value: str,
+    ) -> bytes: ...
+    def delete_key(self, key: str) -> bool: ...
+    def num_keys(self) -> int: ...
+    def set_timeout(self, timeout: timedelta): ...
+    @overload
+    def wait(self, keys: list[str]): ...
+    @overload
+    def wait(self, keys: list[str], timeout: timedelta): ...
+    def queue_pop(self, key: str, block: bool = True) -> bytes: ...
+    def queue_push(self, key: str, value: Union[bytes, str]) -> None: ...
+    def queue_len(self, key: str) -> int: ...
+class FileStore(Store):
+    def __init__(self, path: str, numWorkers: int = ...) -> None: ...
+class HashStore(Store):
+    def __init__(self) -> None: ...
+class TCPStore(Store):
+    def __init__(
+        self,
+        host_name: str,
+        port: int,
+        world_size: int | None = ...,
+        is_master: bool = ...,
+        timeout: timedelta = ...,
+        wait_for_workers: bool = ...,
+        multi_tenant: bool = ...,
+        master_listen_fd: int | None = ...,
+        use_libuv: bool | None = ...,
+    ) -> None: ...
+    @property
+    def host(self) -> str: ...
+    @property
+    def port(self) -> int: ...
+class PrefixStore(Store):
+    def __init__(self, prefix: str, store: Store) -> None: ...
+    @property
+    def underlying_store(self) -> Store: ...
+class _ControlCollectives:
+    def barrier(self, key: str, timeout: timedelta, blocking: bool) -> None: ...
+    def broadcast_send(self, key: str, data: str, timeout: timedelta) -> None: ...
+    def broadcast_recv(self, key: str, timeout: timedelta) -> str: ...
+    def gather_send(self, key: str, data: str, timeout: timedelta) -> None: ...
+    def gather_recv(self, key: str, timeout: timedelta) -> str: ...
+    def scatter_send(self, key: str, data: str, timeout: timedelta) -> None: ...
+    def scatter_recv(self, key: str, timeout: timedelta) -> str: ...
+    def all_gather(self, key: str, data: str, timeout: timedelta) -> str: ...
+    def all_sum(self, key: str, data: int, timeout: timedelta) -> int: ...
+class _StoreCollectives(_ControlCollectives):
+    def __init__(self, store: Store, rank: int, world_size: int) -> None: ...
+class _DistributedBackendOptions:
+    def __init__(self) -> None: ...
+    @property
+    def store(self) -> Store: ...
+    @store.setter
+    def store(self, store: Store) -> None: ...
+    @property
+    def group_rank(self) -> int: ...
+    @group_rank.setter
+    def group_rank(self, rank: int) -> None: ...
+    @property
+    def group_size(self) -> int: ...
+    @group_size.setter
+    def group_size(self, size: int) -> None: ...
+    @property
+    def timeout(self) -> timedelta: ...
+    @timeout.setter
+    def timeout(self, timeout: timedelta) -> None: ...
+    @property
+    def group_id(self) -> str: ...
+    @group_id.setter
+    def group_id(self, group_id: str) -> None: ...
+    @property
+    def global_ranks_in_group(self) -> list[int]: ...
+    @global_ranks_in_group.setter
+    def global_ranks_in_group(self, ranks: list[int]) -> None: ...
+class Work:
+    def is_completed(self) -> bool: ...
+    def is_success(self) -> bool: ...
+    def exception(self) -> Any: ...
+    def wait(self, timeout: timedelta = ...) -> bool: ...
+    def get_future(self) -> Future: ...
+    def source_rank(self) -> int: ...
+    def _source_rank(self) -> int: ...
+    def result(self) -> list[Tensor]: ...
+    def synchronize(self): ...
+    def boxed(self) -> ScriptObject: ...
+    @staticmethod
+    def unbox(obj: ScriptObject) -> Work: ...
+class Backend:
+    class Options:
+        def __init__(self, backend: str, timeout: timedelta = ...) -> None: ...
+        @property
+        def backend(self) -> str: ...
+        @property
+        def _timeout(self) -> timedelta: ...
+        @_timeout.setter
+        def _timeout(self, val: timedelta) -> None: ...
+    def __init__(
+        self,
+        rank: int,
+        size: int,
+    ) -> None: ...
+    @property
+    def supports_splitting(self) -> bool: ...
+    @property
+    def supports_coalescing(self) -> bool: ...
+    @property
+    def supports_time_estimate(self) -> bool: ...
+    @property
+    def options(self) -> Options: ...
+    def rank(self) -> int: ...
+    def size(self) -> int: ...
+    def abort(self) -> None: ...
+    def shutdown(self) -> None: ...
+    def eager_connect_single_device(self, device: torch.device | None) -> None: ...
+    def _set_sequence_number_for_group(self) -> None: ...
+    def _set_default_timeout(self, timeout: timedelta) -> None: ...
+    def get_error(self) -> ErrorType: ...
+    def supports_tensor_alloc(self, device: torch.device) -> bool: ...
+    def allocate_tensor(
+        self,
+        size: int,
+        *,
+        dtype: torch.dtype,
+        device: torch.device,
+    ) -> Tensor: ...
+    @property
+    def mem_allocator(self) -> Any: ...
+class ProcessGroup:
+    class BackendType(Enum):
+        UNDEFINED = ...
+        GLOO = ...
+        NCCL = ...
+        UCC = ...
+        MPI = ...
+        XCCL = ...
+        CUSTOM = ...
+    def __init__(
+        self,
+        store: Store,
+        rank: int,
+        size: int,
+    ) -> None: ...
+    def rank(self) -> int: ...
+    def size(self) -> int: ...
+    def abort(self) -> None: ...
+    def shutdown(self) -> None: ...
+    @overload
+    def broadcast(
+        self,
+        tensors: list[Tensor],
+        opts=...,
+    ) -> Work: ...
+    @overload
+    def broadcast(
+        self,
+        tensor: Tensor,
+        root: int,
+    ) -> Work: ...
+    @overload
+    def allreduce(
+        self,
+        tensors: list[Tensor],
+        opts: AllreduceOptions = ...,
+    ) -> Work: ...
+    @overload
+    def allreduce(
+        self,
+        tensors: list[Tensor],
+        op=...,
+    ) -> Work: ...
+    @overload
+    def allreduce(
+        self,
+        tensor: Tensor,
+        op=...,
+    ) -> Work: ...
+    def allreduce_coalesced(
+        self,
+        tensors: list[Tensor],
+        opts=...,
+    ) -> Work: ...
+    def reduce_scatter_tensor_coalesced(
+        self,
+        outputTensors: list[Tensor],
+        inputTensors: list[Tensor],
+        opts: ReduceScatterOptions | None = None,
+    ) -> Work: ...
+    @overload
+    def reduce(
+        self,
+        tensors: list[Tensor],
+        opts=...,
+    ) -> Work: ...
+    @overload
+    def reduce(
+        self,
+        tensor: Tensor,
+        root: int,
+        op=...,
+    ) -> Work: ...
+    @overload
+    def allgather(
+        self,
+        output_tensors: list[list[Tensor]],
+        input_tensors: list[Tensor],
+        opts=...,
+    ) -> Work: ...
+    @overload
+    def allgather(
+        self,
+        output_tensors: list[Tensor],
+        input_tensor: Tensor,
+    ) -> Work: ...
+    def _allgather_base(
+        self,
+        output: Tensor,
+        input: Tensor,
+        opts=...,
+    ) -> Work: ...
+    def allgather_coalesced(
+        self,
+        output_lists: list[list[Tensor]],
+        input_list: list[Tensor],
+        opts=...,
+    ) -> Work: ...
+    def allgather_into_tensor_coalesced(
+        self,
+        output_lists: list[Tensor],
+        input_list: list[Tensor],
+        opts=...,
+    ) -> Work: ...
+    @overload
+    def gather(
+        self,
+        output_tensors: list[list[Tensor]],
+        input_tensors: list[Tensor],
+        opts=...,
+    ) -> Work: ...
+    @overload
+    def gather(
+        self,
+        output_tensors: list[Tensor],
+        input_tensor: Tensor,
+        root: int,
+    ) -> Work: ...
+    @overload
+    def scatter(
+        self,
+        output_tensors: list[Tensor],
+        input_tensors: list[list[Tensor]],
+        opts=...,
+    ) -> Work: ...
+    @overload
+    def scatter(
+        self,
+        output_tensor: Tensor,
+        input_tensors: list[Tensor],
+        root: int,
+    ) -> Work: ...
+    @overload
+    def reduce_scatter(
+        self,
+        output_tensors: list[Tensor],
+        input_tensors: list[list[Tensor]],
+        opts=...,
+    ) -> Work: ...
+    @overload
+    def reduce_scatter(
+        self,
+        output_tensors: Tensor,
+        input_tensor: list[Tensor],
+    ) -> Work: ...
+    def _reduce_scatter_base(
+        self,
+        outputTensor: Tensor,
+        inputTensor: Tensor,
+        opts: ReduceScatterOptions | None,
+    ) -> Work: ...
+    @overload
+    def alltoall_base(
+        self,
+        output_tensor: Tensor,
+        input_tensor: Tensor,
+        output_split_sizes: list[int],
+        input_split_sizes: list[int],
+        opts=...,
+    ) -> Work: ...
+    @overload
+    def alltoall_base(
+        self,
+        output: Tensor,
+        input: Tensor,
+        output_split_sizes: list[int],
+        input_split_sizes: list[int],
+    ) -> Work: ...
+    @overload
+    def alltoall(
+        self,
+        output_tensor: list[Tensor],
+        input_tensor: list[Tensor],
+        opts=...,
+    ) -> Work: ...
+    @overload
+    def alltoall(
+        self,
+        output: list[Tensor],
+        input: list[Tensor],
+    ) -> Work: ...
+    def send(
+        self,
+        tensors: list[Tensor],
+        dstRank: int,
+        tag: int,
+    ) -> Work: ...
+    def recv(
+        self,
+        tensors: list[Tensor],
+        srcRank: int,
+        tag: int,
+    ) -> Work: ...
+    def recv_anysource(self, tensors: list[Tensor], tag: int) -> Work: ...
+    def barrier(self, opts=...) -> Work: ...
+    def boxed(self) -> ScriptObject: ...
+    @staticmethod
+    def unbox(obj: ScriptObject) -> ProcessGroup: ...
+    def _start_coalescing(self, device: torch.device) -> None: ...
+    def _end_coalescing(self, device: torch.device) -> Work: ...
+    def _get_backend_name(self) -> str: ...
+    def _backend_id(self, backend_type: BackendType) -> int: ...
+    @property
+    def _device_types(self) -> list[torch.device]: ...
+    def _get_backend(self, device: torch.device) -> Backend: ...
+    def _set_default_backend(self, backend_type: BackendType) -> None: ...
+    def _register_backend(
+        self,
+        device: torch.device,
+        backend_type: BackendType,
+        backend: Backend | None,
+    ) -> None: ...
+    def _set_group_name(self, name: str) -> None: ...
+    def _set_group_desc(self, desc: str) -> None: ...
+    def name(self) -> str: ...
+    def _has_hooks(self) -> bool: ...
+    def _wait_for_pending_works(self) -> None: ...
+    def _set_sequence_number_for_group(self) -> None: ...
+    @property
+    def bound_device_id(self) -> torch.device | None: ...
+    @bound_device_id.setter
+    def bound_device_id(self, device: torch.device | None) -> None: ...
+    @property
+    def group_name(self) -> str: ...
+    @property
+    def group_desc(self) -> str: ...
+class FakeProcessGroup(Backend):
+    def __init__(self, rank: int, world_size: int) -> None: ...
+class FakeWork(Work):
+    seq_id: int
+    def __init__(self) -> None: ...
+    def wait(self, timeout: timedelta = ...) -> bool: ...
+    def getFuture(self) -> Future: ...
+class ProcessGroupGloo(Backend):
+    class Device: ...
+    class Options(Backend.Options):
+        devices: list[ProcessGroupGloo.Device]
+        threads: int
+        global_ranks_in_group: list[int]
+        group_name: str
+        def __init__(self): ...
+    def __init__(
+        self,
+        store: Store,
+        rank: int,
+        size: int,
+        timeout: timedelta,
+    ) -> None: ...
+    @staticmethod
+    def create_device(hostname="", interface="", lazy_init=None) -> Device: ...
+    @staticmethod
+    def create_default_device(lazy_init=None) -> Device: ...
+    def _set_default_timeout(self, timeout) -> None: ...
+    @property
+    def options(self) -> Options: ...  # type: ignore[override]
+class _ProcessGroupWrapper(Backend):
+    def __init__(self, pg: Backend, gloo_pg: ProcessGroupGloo) -> None: ...
+    wrapped_pg: Backend
+class ErrorType(Enum):
+    SUCCESS = ...
+    TIMEOUT = ...
+    COMM_ERROR = ...
+    REMOTE_ERROR = ...
+class ProcessGroupNCCL(Backend):
+    class NCCLConfig:
+        blocking: int
+        cga_cluster_size: int
+        min_ctas: int
+        max_ctas: int
+    class Options(Backend.Options):
+        config: ProcessGroupNCCL.NCCLConfig
+        is_high_priority_stream: bool
+        split_from: ProcessGroupNCCL
+        split_color: int
+        global_ranks_in_group: list[int]
+        group_name: str
+        def __init__(self, is_high_priority_stream: bool = False): ...
+    def __init__(
+        self,
+        store: Store,
+        rank: int,
+        size: int,
+        options: Options,
+    ) -> None: ...
+    def _group_start(self) -> None: ...
+    def _group_end(self) -> None: ...
+    def _start_time_estimate(self) -> None: ...
+    def _end_time_estimate(self) -> float: ...
+    def _set_default_timeout(self, timeout) -> None: ...
+    def perform_nocolor_split(self, device: torch.device) -> None: ...
+    def register_mem_pool(self, pool: torch.cuda.MemPool) -> None: ...
+    def deregister_mem_pool(self, pool: torch.cuda.MemPool) -> None: ...
+    def comm_split_count(self) -> int: ...
+    def _add_ephemeral_timeout(self, timeout: timedelta) -> None: ...
+    def abort(self) -> None: ...
+    def _is_initialized(self) -> bool: ...
+    @property
+    def uid(self) -> int: ...
+    @property
+    def options(self) -> Options: ...  # type: ignore[override]
+    @staticmethod
+    def get_build_nccl_version(self) -> tuple[int, int, int]: ...
+    @staticmethod
+    def get_runtime_nccl_version(self) -> tuple[int, int, int]: ...
+class ProcessGroupUCC(Backend):
+    def __init__(
+        self,
+        store: Store,
+        rank: int,
+        size: int,
+        timeout: timedelta,
+    ) -> None: ...
+class ProcessGroupMPI(Backend):
+    def __init__(
+        self,
+        rank: int,
+        size: int,
+        pgComm: int,
+    ) -> None: ...
+    @staticmethod
+    def create(ranks: list[int]) -> ProcessGroupMPI: ...
+def _compute_bucket_assignment_by_size(
+    tensors: list[Tensor],
+    bucket_size_limits: list[int],
+    expect_sparse_gradient: list[bool] = ...,
+    tensor_indices: list[int] = ...,
+) -> tuple[list[list[int]], list[int]]: ...
+def _broadcast_coalesced(
+    process_group: ProcessGroup,
+    tensors: list[Tensor],
+    buffer_size: int,
+    src: int,
+): ...
+def _test_python_store(store: Store): ...
+def _verify_params_across_processes(
+    process_group: ProcessGroup,
+    params: list[Tensor],
+    logger: Logger | None,
+): ...
+def _make_nccl_premul_sum(factor: float | list[Tensor]) -> ReduceOp: ...
+def _register_process_group(
+    group_name: str,
+    process_group: ProcessGroup,
+) -> None: ...
+def _resolve_process_group(group_name: str) -> ProcessGroup: ...
+def _register_work(tensor: torch.Tensor, work: Work) -> ProcessGroup: ...
+def _get_work_registry_size() -> int: ...
+def _set_allow_inflight_collective_as_graph_input(
+    value: bool,
+) -> None: ...
+def _allow_inflight_collective_as_graph_input() -> bool: ...
+def _unregister_all_process_groups() -> None: ...
+def _unregister_process_group(group_name: str) -> None: ...
+# Intializes the device state in CUmodule so that it’s able to perform NVSHMEM
+# operations.  CUmodule is a pointer to a CUDA module, carried by a int64 in
+# Python. At C++ interface, it is converted to a uintptr_t.
+def _nvshmemx_cumodule_init(module: int) -> None: ...
+# Check if NVSHMEM is available on current system.
+def _is_nvshmem_available() -> bool: ...
+class _SymmetricMemory:
+    @staticmethod
+    def set_group_info(
+        group_name: str,
+        rank: int,
+        world_size: int,
+        store: Store,
+    ) -> None: ...
+    @staticmethod
+    def empty_strided_p2p(
+        size: torch.types._size,
+        stride: torch.types._size,
+        dtype: torch.dtype,
+        device: torch.device,
+        group_name: str | None = None,
+        alloc_id: int | None = None,
+    ) -> torch.Tensor: ...
+    @staticmethod
+    def has_multicast_support(
+        device_type: DeviceType,
+        device_idx: int,
+    ) -> bool: ...
+    @property
+    def rank(self) -> int: ...
+    @property
+    def world_size(self) -> int: ...
+    @staticmethod
+    def rendezvous(
+        tensor: torch.Tensor, group_name: str | None = None
+    ) -> _SymmetricMemory: ...
+    def get_buffer(
+        self,
+        rank: int,
+        sizes: torch.types._size,
+        dtype: torch.dtype,
+        storage_offset: int | None = 0,
+    ) -> torch.Tensor: ...
+    def get_signal_pad(
+        self,
+        rank: int,
+        sizes: torch.types._size = [],
+        dtype: torch.dtype | None = None,
+        storage_offset: int | None = 0,
+    ) -> torch.Tensor: ...
+    def barrier(self, channel: int = 0, timeout_ms: int = 0) -> None: ...
+    def put_signal(
+        self,
+        dst_rank: int,
+        channel: int = 0,
+        timeout_ms: int = 0,
+    ) -> None: ...
+    def wait_signal(
+        self,
+        src_rank: int,
+        channel: int = 0,
+        timeout_ms: int = 0,
+    ) -> None: ...
+    @staticmethod
+    def memset32(
+        tensor: torch.Tensor, offset: int, val: int, count: int = 1
+    ) -> torch.Tensor: ...
+    @staticmethod
+    def stream_write_value32(
+        tensor: torch.Tensor, offset: int, val: int
+    ) -> torch.Tensor: ...
+    @property
+    def buffer_ptrs(self) -> list[int]: ...
+    @property
+    def buffer_ptrs_dev(self) -> int: ...
+    @property
+    def signal_pad_ptrs(self) -> list[int]: ...
+    @property
+    def signal_pad_ptrs_dev(self) -> int: ...
+    @property
+    def multicast_ptr(self) -> int: ...
+    @property
+    def buffer_size(self) -> int: ...
+    @property
+    def signal_pad_size(self) -> int: ...
+class ProcessGroupXCCL(Backend):
+    def __init__(
+        self,
+        store: Store,
+        rank: int,
+        size: int,
+    ): ...

phivenv/Lib/site-packages/torch/_C/_distributed_rpc.pyi ADDED Viewed

	@@ -0,0 +1,188 @@

+# mypy: allow-untyped-defs
+# mypy: disable-error-code="type-arg"
+from datetime import timedelta
+from typing import Any, Generic, overload, TypeVar
+import torch
+from torch._C import Future
+from torch._C._autograd import ProfilerEvent
+from torch._C._distributed_c10d import Store
+from torch._C._profiler import ProfilerConfig
+# This module is defined in torch/csrc/distributed/rpc/init.cpp
+_DEFAULT_INIT_METHOD: str
+_DEFAULT_NUM_WORKER_THREADS: int
+_UNSET_RPC_TIMEOUT: float
+_DEFAULT_RPC_TIMEOUT_SEC: float
+_T = TypeVar("_T")
+class RpcBackendOptions:
+    rpc_timeout: float
+    init_method: str
+    def __init__(
+        self,
+        rpc_timeout: float = ...,
+        init_method: str = ...,
+    ) -> None: ...
+class WorkerInfo:
+    def __init__(self, name: str, worker_id: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+    @property
+    def id(self) -> int: ...
+    def __eq__(self, other: object) -> bool: ...
+class RpcAgent:
+    def join(self, shutdown: bool = False, timeout: float = 0): ...
+    def sync(self): ...
+    def shutdown(self): ...
+    @overload
+    def get_worker_info(self) -> WorkerInfo: ...
+    @overload
+    def get_worker_info(self, workerName: str) -> WorkerInfo: ...
+    def get_worker_infos(self) -> list[WorkerInfo]: ...
+    def _get_device_map(self, dst: WorkerInfo) -> dict[torch.device, torch.device]: ...
+    def get_debug_info(self) -> dict[str, str]: ...
+    def get_metrics(self) -> dict[str, str]: ...
+class PyRRef(Generic[_T]):
+    def __init__(self, value: _T, type_hint: Any = None) -> None: ...
+    def is_owner(self) -> bool: ...
+    def confirmed_by_owner(self) -> bool: ...
+    def owner(self) -> WorkerInfo: ...
+    def owner_name(self) -> str: ...
+    def to_here(self, timeout: float = ...) -> _T: ...
+    def local_value(self) -> Any: ...
+    def rpc_sync(self, timeout: float = ...) -> Any: ...
+    def rpc_async(self, timeout: float = ...) -> Any: ...
+    def remote(self, timeout: float = ...) -> Any: ...
+    def _serialize(self) -> tuple: ...
+    @staticmethod
+    def _deserialize(tp: tuple) -> PyRRef: ...
+    def _get_type(self) -> type[_T]: ...
+    def _get_future(self) -> Future[_T]: ...
+    def _get_profiling_future(self) -> Future[_T]: ...
+    def _set_profiling_future(self, profilingFuture: Future[_T]): ...
+class _TensorPipeRpcBackendOptionsBase(RpcBackendOptions):
+    num_worker_threads: int
+    device_maps: dict[str, dict[torch.device, torch.device]]
+    devices: list[torch.device]
+    def __init__(
+        self,
+        num_worker_threads: int,
+        _transports: list | None,
+        _channels: list | None,
+        rpc_timeout: float = ...,
+        init_method: str = ...,
+        device_maps: dict[str, dict[torch.device, torch.device]] = {},  # noqa: B006
+        devices: list[torch.device] = [],  # noqa: B006
+    ) -> None: ...
+    def _set_device_map(
+        self,
+        to: str,
+        device_map: dict[torch.device, torch.device],
+    ): ...
+class TensorPipeAgent(RpcAgent):
+    def __init__(
+        self,
+        store: Store,
+        name: str,
+        worker_id: int,
+        world_size: int | None,
+        opts: _TensorPipeRpcBackendOptionsBase,
+        reverse_device_maps: dict[str, dict[torch.device, torch.device]],
+        devices: list[torch.device],
+    ) -> None: ...
+    def join(self, shutdown: bool = False, timeout: float = 0): ...
+    def shutdown(self): ...
+    @overload
+    def get_worker_info(self) -> WorkerInfo: ...
+    @overload
+    def get_worker_info(self, workerName: str) -> WorkerInfo: ...
+    @overload
+    def get_worker_info(self, id: int) -> WorkerInfo: ...
+    def get_worker_infos(self) -> list[WorkerInfo]: ...
+    def _get_device_map(self, dst: WorkerInfo) -> dict[torch.device, torch.device]: ...
+    def _update_group_membership(
+        self,
+        worker_info: WorkerInfo,
+        my_devices: list[torch.device],
+        reverse_device_map: dict[str, dict[torch.device, torch.device]],
+        is_join: bool,
+    ): ...
+    def _get_backend_options(self) -> _TensorPipeRpcBackendOptionsBase: ...
+    @property
+    def is_static_group(self) -> bool: ...
+    @property
+    def store(self) -> Store: ...
+def _is_current_rpc_agent_set() -> bool: ...
+def _get_current_rpc_agent() -> RpcAgent: ...
+def _set_and_start_rpc_agent(agent: RpcAgent): ...
+def _reset_current_rpc_agent(): ...
+def _delete_all_user_and_unforked_owner_rrefs(timeout: timedelta = ...): ...
+def _destroy_rref_context(ignoreRRefLeak: bool): ...
+def _rref_context_get_debug_info() -> dict[str, str]: ...
+def _cleanup_python_rpc_handler(): ...
+def _invoke_rpc_builtin(
+    dst: WorkerInfo,
+    opName: str,
+    rpcTimeoutSeconds: float,
+    *args: Any,
+    **kwargs: Any,
+): ...
+def _invoke_rpc_python_udf(
+    dst: WorkerInfo,
+    pickledPythonUDF: str,
+    tensors: list[torch.Tensor],
+    rpcTimeoutSeconds: float,
+    isAsyncExecution: bool,
+): ...
+def _invoke_rpc_torchscript(
+    dstWorkerName: str,
+    qualifiedNameStr: str,
+    argsTuple: tuple,
+    kwargsDict: dict,
+    rpcTimeoutSeconds: float,
+    isAsyncExecution: bool,
+): ...
+def _invoke_remote_builtin(
+    dst: WorkerInfo,
+    opName: str,
+    rpcTimeoutSeconds: float,
+    *args: Any,
+    **kwargs: Any,
+): ...
+def _invoke_remote_python_udf(
+    dst: WorkerInfo,
+    pickledPythonUDF: str,
+    tensors: list[torch.Tensor],
+    rpcTimeoutSeconds: float,
+    isAsyncExecution: bool,
+): ...
+def _invoke_remote_torchscript(
+    dstWorkerName: WorkerInfo,
+    qualifiedNameStr: str,
+    rpcTimeoutSeconds: float,
+    isAsyncExecution: bool,
+    *args: Any,
+    **kwargs: Any,
+): ...
+def get_rpc_timeout() -> float: ...
+def enable_gil_profiling(flag: bool): ...
+def _set_rpc_timeout(rpcTimeoutSeconds: float): ...
+class RemoteProfilerManager:
+    @staticmethod
+    def set_current_profiling_key(key: str): ...
+def _enable_server_process_global_profiler(new_config: ProfilerConfig): ...
+def _disable_server_process_global_profiler() -> list[list[list[ProfilerEvent]]]: ...
+def _set_profiler_node_id(default_node_id: int): ...
+def _enable_jit_rref_pickle(): ...
+def _disable_jit_rref_pickle(): ...

phivenv/Lib/site-packages/torch/_C/_distributed_rpc_testing.pyi ADDED Viewed

	@@ -0,0 +1,32 @@

+import torch
+from torch._C._distributed_c10d import Store
+from torch._C._distributed_rpc import _TensorPipeRpcBackendOptionsBase, TensorPipeAgent
+# This module is defined in torch/csrc/distributed/rpc/testing/init.cpp
+class FaultyTensorPipeRpcBackendOptions(_TensorPipeRpcBackendOptionsBase):
+    def __init__(
+        self,
+        num_worker_threads: int,
+        rpc_timeout: float,
+        init_method: str,
+        messages_to_fail: list[str],
+        messages_to_delay: dict[str, float],
+        num_fail_sends: int,
+    ) -> None: ...
+    num_send_recv_threads: int
+    messages_to_fail: list[str]
+    messages_to_delay: dict[str, float]
+    num_fail_sends: int
+class FaultyTensorPipeAgent(TensorPipeAgent):
+    def __init__(
+        self,
+        store: Store,
+        name: str,
+        rank: int,
+        world_size: int,
+        options: FaultyTensorPipeRpcBackendOptions,
+        reverse_device_maps: dict[str, dict[torch.device, torch.device]],
+        devices: list[torch.device],
+    ) -> None: ...

phivenv/Lib/site-packages/torch/_C/_dynamo/__init__.pyi ADDED Viewed

	@@ -0,0 +1,4 @@

+from . import compiled_autograd, eval_frame, guards  # noqa: F401
+def strip_function_call(name: str) -> str: ...
+def is_valid_var_name(name: str) -> bool | int: ...

phivenv/Lib/site-packages/torch/_C/_dynamo/compiled_autograd.pyi ADDED Viewed

	@@ -0,0 +1,13 @@

+from typing import Callable
+from torch import Tensor
+from torch._dynamo.compiled_autograd import AutogradCompilerInstance
+def set_autograd_compiler(
+    autograd_compiler: Callable[[], AutogradCompilerInstance] | None,
+    dynamic: bool,
+) -> tuple[Callable[[], AutogradCompilerInstance] | None, bool]: ...
+def clear_cache() -> None: ...
+def is_cache_empty() -> bool: ...
+def set_verbose_logger(fn: Callable[[str], None] | None) -> bool: ...
+def call_cpp_tensor_pre_hooks(idx: int, grad: Tensor) -> Tensor: ...

phivenv/Lib/site-packages/torch/_C/_dynamo/eval_frame.pyi ADDED Viewed

	@@ -0,0 +1,71 @@

+import enum
+import types
+from typing import Optional, overload
+from torch._dynamo.types import (
+    DynamoCallback,
+    DynamoGuardCompleteHook,
+    DynamoGuardHook,
+    GuardFn,
+)
+def set_eval_frame(callback: DynamoCallback) -> DynamoCallback: ...
+def set_skip_guard_eval_unsafe(value: bool) -> bool: ...
+def get_eval_frame_callback() -> DynamoCallback: ...
+def reset_code(code: types.CodeType) -> None: ...
+def unsupported(obj1: object, obj2: object) -> object: ...
+def set_code_exec_strategy(
+    code: types.CodeType, strategy: _FrameExecStrategy
+) -> None: ...
+def set_guard_error_hook(hook: DynamoGuardHook) -> None: ...
+def set_guard_complete_hook(
+    hook: Optional[DynamoGuardCompleteHook],
+) -> Optional[DynamoGuardCompleteHook]: ...
+def raise_sigtrap() -> None: ...
+class _CacheEntry:
+    def check_fn(self, *args: object, **kwargs: object) -> bool: ...
+    code: types.CodeType
+    next: _CacheEntry | None
+class _ExtraState:
+    def invalidate(self, cache_entry: _CacheEntry, guard_manager: object) -> None: ...
+class _FrameAction(enum.IntEnum):
+    DEFAULT = 0
+    SKIP = 1
+    RUN_ONLY = 2
+class _FrameExecStrategy:
+    cur_action: _FrameAction
+    recursive_action: _FrameAction
+    @overload
+    def __init__(self) -> None: ...
+    @overload
+    def __init__(
+        self, cur_action: _FrameAction, recursive_action: _FrameAction
+    ) -> None: ...
+# This is an object that encapsulates the Python FrameType, and exposes
+# properties Dynamo cares about for a frame.
+class _PyInterpreterFrame:
+    f_code: types.CodeType
+    f_locals: dict[str, object]
+    f_globals: dict[str, object]
+    f_builtins: dict[str, object]
+    f_lasti: int
+    f_lineo: int
+    f_back: types.FrameType
+    # A tuple containing cell objects captured by this frame.
+    closure: tuple[types.CellType]
+def _debug_get_cache_entry_list(code: types.CodeType) -> list[_CacheEntry]: ...
+py_opcode_caches: list[int]
+def code_framelocals_names(code: types.CodeType) -> tuple[str]: ...
+def _load_precompile_entry(
+    code: types.CodeType, guard_manager: GuardFn, dynamo_code: types.CodeType
+) -> None: ...
+def _reset_precompile_entries(code: types.CodeType) -> None: ...

phivenv/Lib/site-packages/torch/_C/_dynamo/guards.pyi ADDED Viewed

	@@ -0,0 +1,191 @@

+# mypy: allow-untyped-defs
+from typing import Any, Callable
+import torch
+class GlobalStateGuard:
+    def check(self) -> bool: ...
+    def reason(self) -> str: ...
+class LeafGuard: ...
+class GuardDebugInfo: ...
+class GuardManager:
+    def check(self, value) -> bool: ...
+    def check_verbose(self, value) -> GuardDebugInfo: ...
+    # Accessors
+    def globals_dict_manager(
+        self,
+        f_globals: dict[str, Any],
+        source,
+        example_value,
+        guard_manager_enum,
+    ) -> GuardManager: ...
+    def framelocals_manager(
+        self,
+        key: tuple[str, int],
+        source,
+        example_value,
+        guard_manager_enum,
+    ) -> GuardManager: ...
+    def dict_getitem_manager(
+        self,
+        key,
+        source,
+        example_value,
+        guard_manager_enum,
+    ) -> GuardManager: ...
+    def global_weakref_manager(
+        self,
+        global_name: str,
+        source,
+        example_value,
+        guard_manager_enum,
+    ) -> GuardManager: ...
+    def type_manager(
+        self,
+        source,
+        example_value,
+        guard_manager_enum,
+    ) -> GuardManager: ...
+    def getattr_manager(
+        self,
+        attr: str,
+        source,
+        example_value,
+        guard_manager_enum,
+    ) -> GuardManager: ...
+    def tensor_property_size_manager(
+        self,
+        idx: int,
+        source,
+        example_value,
+        guard_manager_enum,
+    ) -> GuardManager: ...
+    def tensor_property_shape_manager(
+        self,
+        idx: int,
+        source,
+        example_value,
+        guard_manager_enum,
+    ) -> GuardManager: ...
+    def tensor_property_storage_offset_manager(
+        self,
+        idx: None,
+        source,
+        example_value,
+        guard_manager_enum,
+    ) -> GuardManager: ...
+    def indexed_manager(
+        self,
+        idx: int,
+        source,
+        example_value,
+        guard_manager_enum,
+    ) -> GuardManager: ...
+    def lambda_manager(
+        self,
+        python_lambda,
+        source,
+        example_value,
+        guard_manager_enum,
+    ) -> GuardManager: ...
+    # Leaf guards
+    def add_lambda_guard(self, user_lambda, verbose_code_parts: list[str]) -> None: ...
+    def add_id_match_guard(self, id_val, verbose_code_parts: list[str]) -> None: ...
+    def add_equals_match_guard(
+        self,
+        equals_val,
+        verbose_code_parts: list[str],
+    ) -> None: ...
+    def add_global_state_guard(self, verbose_code_parts: list[str]) -> None: ...
+    def add_torch_function_mode_stack_guard(
+        self, initial_stack, verbose_code_parts: list[str]
+    ) -> None: ...
+    def add_mapping_keys_guard(sef, value, verbose_code_parts: list[str]) -> None: ...
+class RootGuardManager(GuardManager):
+    def get_epilogue_lambda_guards(self) -> list[LeafGuard]: ...
+    def add_epilogue_lambda_guard(
+        self,
+        guard: LeafGuard,
+        verbose_code_parts: list[str],
+    ) -> None: ...
+    def clone_manager(
+        self, clone_filter_fn: Callable[[GuardManager], bool]
+    ) -> RootGuardManager: ...
+class DictGuardManager(GuardManager):
+    def get_key_manager(
+        self,
+        index,
+        source,
+        example_value,
+        guard_manager_enum,
+    ) -> GuardManager: ...
+    def get_value_manager(
+        self,
+        index,
+        source,
+        example_value,
+        guard_manager_enum,
+    ) -> GuardManager: ...
+def install_object_aliasing_guard(
+    guard_managers: list[GuardManager],
+    tensor_names: list[str],
+    verbose_code_parts: list[str],
+): ...
+def install_no_tensor_aliasing_guard(
+    guard_managers: list[GuardManager],
+    tensor_names: list[str],
+    verbose_code_parts: list[str],
+): ...
+def install_storage_overlapping_guard(
+    overlapping_guard_managers: list[GuardManager],
+    non_overlapping_guard_managers: list[GuardManager],
+    verbose_code_parts: list[str],
+): ...
+def install_symbolic_shape_guard(
+    guard_managers: list[GuardManager],
+    nargs_int: int,
+    nargs_float: int,
+    py_addr: int,
+    py_addr_keep_alive: Any,
+    verbose_code_parts: list[str],
+): ...
+def profile_guard_manager(
+    guard_manager: GuardManager,
+    f_locals: dict[str, Any],
+    n_iters: int,
+) -> float: ...
+class TensorGuards:
+    def __init__(
+        self,
+        *,
+        dynamic_dims_sizes: list[torch.SymInt | None] | None = None,
+        dynamic_dims_strides: list[torch.SymInt | None] | None = None,
+    ) -> None: ...
+    def check(self, *args) -> bool: ...
+    def check_verbose(self, *args, tensor_check_names=None) -> bool | str: ...
+def assert_size_stride(
+    item: torch.Tensor,
+    size: torch.types._size,
+    stride: torch.types._size,
+    op_name: str | None = None,
+): ...
+def assert_alignment(
+    item: torch.Tensor,
+    alignment: int,
+    op_name: str | None = None,
+): ...
+def check_obj_id(obj: object, expected: int) -> bool: ...
+def check_type_id(obj: object, expected: int) -> bool: ...
+def dict_version(d: dict[Any, Any]) -> int: ...
+def compute_overlapping_tensors(
+    tensors: list[torch.Tensor], symbolic: bool = True
+) -> set[int]: ...

phivenv/Lib/site-packages/torch/_C/_export/__init__.pyi ADDED Viewed

	@@ -0,0 +1,9 @@

+# Defined in torch/csrc/export/pybind.cpp
+class CppExportedProgram: ...
+def deserialize_exported_program(
+    serialized_program: str,
+) -> CppExportedProgram: ...
+def serialize_exported_program(
+    cpp_exported_program: CppExportedProgram,
+) -> str: ...

phivenv/Lib/site-packages/torch/_C/_export/pt2_archive_constants.pyi ADDED Viewed

	@@ -0,0 +1,22 @@

+# Defined in torch/csrc/export/pt2_archive_constants.h
+ARCHIVE_ROOT_NAME: str = ...
+ARCHIVE_FORMAT_PATH: str = ...
+ARCHIVE_FORMAT_VALUE: str = ...
+ARCHIVE_VERSION_PATH: str = ...
+ARCHIVE_VERSION_VALUE: str = ...
+MODELS_DIR: str = ...
+MODELS_FILENAME_FORMAT: str = ...
+AOTINDUCTOR_DIR: str = ...
+MTIA_DIR: str = ...
+WEIGHTS_DIR: str = ...
+WEIGHT_FILENAME_PREFIX: str = ...
+CONSTANTS_DIR: str = ...
+TENSOR_CONSTANT_FILENAME_PREFIX: str = ...
+CUSTOM_OBJ_FILENAME_PREFIX: str = ...
+SAMPLE_INPUTS_DIR: str = ...
+SAMPLE_INPUTS_FILENAME_FORMAT: str = ...
+EXTRA_DIR: str = ...
+MODULE_INFO_PATH: str = ...
+XL_MODEL_WEIGHTS_DIR: str = ...
+XL_MODEL_WEIGHTS_PARAM_CONFIG_PATH: str = ...

phivenv/Lib/site-packages/torch/_C/_functions.pyi ADDED Viewed

	@@ -0,0 +1,19 @@

+from typing import AnyStr, overload
+from torch import Tensor
+class UndefinedGrad:
+    def __init__(self) -> None: ...
+    def __call__(self, *inputs: Tensor) -> list[Tensor]: ...
+class DelayedError:
+    def __init__(self, msg: AnyStr, num_inputs: int) -> None: ...
+    # __call__ should really be a higher-kinded type:
+    # def __call__(self, arg: Tensor) -> Tensor: ...
+    # def __call__(self, *args: Tensor * num_inputs) -> Tuple[Tensor * num_inputs]: ...
+    @overload
+    def __call__(self, i0: Tensor) -> Tensor: ...
+    @overload
+    def __call__(self, *args: Tensor) -> tuple[Tensor, ...]: ...

phivenv/Lib/site-packages/torch/_C/_functorch.pyi ADDED Viewed

	@@ -0,0 +1,86 @@

+# mypy: allow-untyped-defs
+from enum import Enum
+from torch import Tensor
+# Defined in torch/csrc/functorch/init.cpp
+def _set_dynamic_layer_keys_included(included: bool) -> None: ...
+def get_unwrapped(tensor: Tensor) -> Tensor: ...
+def is_batchedtensor(tensor: Tensor) -> bool: ...
+def is_functionaltensor(tensor: Tensor) -> bool: ...
+def is_functorch_wrapped_tensor(tensor: Tensor) -> bool: ...
+def is_gradtrackingtensor(tensor: Tensor) -> bool: ...
+def is_legacy_batchedtensor(tensor: Tensor) -> bool: ...
+def maybe_get_bdim(tensor: Tensor) -> int: ...
+def maybe_get_level(tensor: Tensor) -> int: ...
+def maybe_current_level() -> int | None: ...
+def unwrap_if_dead(tensor: Tensor) -> Tensor: ...
+def _unwrap_for_grad(tensor: Tensor, level: int) -> Tensor: ...
+def _wrap_for_grad(tensor: Tensor, level: int) -> Tensor: ...
+def _unwrap_batched(tensor: Tensor, level: int) -> tuple[Tensor, int | None]: ...
+def current_level() -> int: ...
+def count_jvp_interpreters() -> int: ...
+def _add_batch_dim(tensor: Tensor, bdim: int, level: int) -> Tensor: ...
+def set_single_level_autograd_function_allowed(allowed: bool) -> None: ...
+def get_single_level_autograd_function_allowed() -> bool: ...
+def _unwrap_functional_tensor(tensor: Tensor, reapply_views: bool) -> Tensor: ...
+def _wrap_functional_tensor(tensor: Tensor, level: int) -> Tensor: ...
+def _vmap_increment_nesting(batch_size: int, randomness: str) -> int: ...
+def _vmap_decrement_nesting() -> int: ...
+def _grad_increment_nesting() -> int: ...
+def _grad_decrement_nesting() -> int: ...
+def _jvp_increment_nesting() -> int: ...
+def _jvp_decrement_nesting() -> int: ...
+# Defined in aten/src/ATen/functorch/Interpreter.h
+class TransformType(Enum):
+    Torch = ...
+    Vmap = ...
+    Grad = ...
+    Jvp = ...
+    Functionalize = ...
+class RandomnessType(Enum):
+    Error = ...
+    Same = ...
+    Different = ...
+class CInterpreter:
+    def key(self) -> TransformType: ...
+    def level(self) -> int: ...
+    def serialize(self) -> bytes: ...
+    @staticmethod
+    def deserialize(bytes) -> CInterpreter: ...
+class CGradInterpreterPtr:
+    def __init__(self, interpreter: CInterpreter) -> None: ...
+    def lift(self, Tensor) -> Tensor: ...
+    def prevGradMode(self) -> bool: ...
+class CJvpInterpreterPtr:
+    def __init__(self, interpreter: CInterpreter) -> None: ...
+    def lift(self, Tensor) -> Tensor: ...
+    def prevFwdGradMode(self) -> bool: ...
+class CFunctionalizeInterpreterPtr:
+    def __init__(self, interpreter: CInterpreter) -> None: ...
+    def key(self) -> TransformType: ...
+    def level(self) -> int: ...
+    def functionalizeAddBackViews(self) -> bool: ...
+class CVmapInterpreterPtr:
+    def __init__(self, interpreter: CInterpreter) -> None: ...
+    def key(self) -> TransformType: ...
+    def level(self) -> int: ...
+    def batchSize(self) -> int: ...
+    def randomness(self) -> RandomnessType: ...
+class DynamicLayer: ...
+def get_dynamic_layer_stack_depth() -> int: ...
+def get_interpreter_stack() -> list[CInterpreter]: ...
+def peek_interpreter_stack() -> CInterpreter: ...
+def pop_dynamic_layer_stack() -> DynamicLayer: ...
+def pop_dynamic_layer_stack_and_undo_to_depth(int) -> None: ...
+def push_dynamic_layer_stack(dl: DynamicLayer) -> int: ...

phivenv/Lib/site-packages/torch/_C/_instruction_counter.pyi ADDED Viewed

	@@ -0,0 +1,4 @@

+# Defined in torch/csrc/instruction_counter/Module.cpp
+def start() -> int: ...
+def end(id: int) -> int: ...

phivenv/Lib/site-packages/torch/_C/_itt.pyi ADDED Viewed

	@@ -0,0 +1,5 @@

+# Defined in torch/csrc/itt.cpp
+def is_available() -> None: ...
+def rangePush(message: str) -> None: ...
+def rangePop() -> None: ...
+def mark(message: str) -> None: ...

phivenv/Lib/site-packages/torch/_C/_lazy.pyi ADDED Viewed

	@@ -0,0 +1,26 @@

+from torch import Tensor
+# defined in torch/csrc/lazy/python/init.cpp
+def _mark_step(device: str, devices: list[str], wait: bool) -> None: ...
+def _wait_device_ops(devices: list[str]) -> None: ...
+def _reset_metrics() -> None: ...
+def _counter_names() -> list[str]: ...
+def _counter_value(name: str) -> int: ...
+def _metrics_report() -> str: ...
+def _get_graph_hash(tensors: list[Tensor]) -> str: ...
+def _sync_multi(
+    tensors: list[Tensor],
+    devices: list[str],
+    wait: bool = True,
+    sync_ltc_data: bool = True,
+) -> None: ...
+def _get_tensor_id(tensor: Tensor) -> int: ...
+def _get_tensors_text(tensors: list[Tensor]) -> str: ...
+def _get_tensors_dot(tensors: list[Tensor]) -> str: ...
+def _get_tensors_backend(tensors: list[Tensor]) -> str: ...
+def _get_force_fallback() -> str: ...
+def _set_force_fallback(newval: str) -> None: ...
+def _clear_ir_cache() -> None: ...
+def _dump_ir_cache(filename: str) -> None: ...
+def _set_reuse_ir(val: bool) -> None: ...
+def _get_default_device_type() -> str: ...

phivenv/Lib/site-packages/torch/_C/_lazy_ts_backend.pyi ADDED Viewed

	@@ -0,0 +1,12 @@

+# mypy: allow-untyped-defs
+# defined in torch/csrc/lazy/python/init.cpp
+from typing import Any
+from torch import Tensor
+def _init(): ...
+def _get_tensors_ts_device_data_node(
+    tensors: list[Tensor],
+) -> tuple[list[int], list[Any]]: ...
+def _run_cached_graph(hash_str: str, graph_inputs: list[Any]) -> list[Tensor]: ...

phivenv/Lib/site-packages/torch/_C/_monitor.pyi ADDED Viewed

	@@ -0,0 +1,58 @@

+# Defined in torch/csrc/monitor/python_init.cpp
+import datetime
+from enum import Enum
+from types import TracebackType
+from typing import Callable
+class Aggregation(Enum):
+    VALUE = ...
+    MEAN = ...
+    COUNT = ...
+    SUM = ...
+    MAX = ...
+    MIN = ...
+class Stat:
+    name: str
+    count: int
+    def __init__(
+        self,
+        name: str,
+        aggregations: list[Aggregation],
+        window_size: int,
+        max_samples: int = -1,
+    ) -> None: ...
+    def add(self, v: float) -> None: ...
+    def get(self) -> dict[Aggregation, float]: ...
+class Event:
+    name: str
+    timestamp: datetime.datetime
+    data: dict[str, int | float | bool | str]
+    def __init__(
+        self,
+        name: str,
+        timestamp: datetime.datetime,
+        data: dict[str, int | float | bool | str],
+    ) -> None: ...
+def log_event(e: Event) -> None: ...
+class EventHandlerHandle: ...
+def register_event_handler(handler: Callable[[Event], None]) -> EventHandlerHandle: ...
+def unregister_event_handler(handle: EventHandlerHandle) -> None: ...
+class _WaitCounterTracker:
+    def __enter__(self) -> None: ...
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None = None,
+        exc_value: BaseException | None = None,
+        traceback: TracebackType | None = None,
+    ) -> None: ...
+class _WaitCounter:
+    def __init__(self, key: str) -> None: ...
+    def guard(self) -> _WaitCounterTracker: ...

phivenv/Lib/site-packages/torch/_C/_nn.pyi ADDED Viewed

	@@ -0,0 +1,175 @@

+# @generated by tools/pyi/gen_pyi.py from torch/_C/_nn.pyi.in
+# mypy: disable-error-code="type-arg"
+from collections.abc import Sequence
+from typing import Literal, overload
+from torch import memory_format, Tensor
+from torch.types import _bool, _device, _dtype, _int, _size
+# Defined in tools/autograd/templates/python_nn_functions.cpp
+def adaptive_avg_pool2d(input: Tensor, output_size: _int | _size) -> Tensor: ...
+def adaptive_avg_pool3d(input: Tensor, output_size: _int | _size) -> Tensor: ...
+def adaptive_max_pool2d(
+    input: Tensor,
+    output_size: _int | _size,
+) -> tuple[Tensor, Tensor]: ...
+def adaptive_max_pool3d(
+    input: Tensor,
+    output_size: _int | _size,
+) -> tuple[Tensor, Tensor]: ...
+def avg_pool2d(
+    input: Tensor,
+    kernel_size: _int | _size,
+    stride: _int | _size | None = None,
+    padding: _int | _size = 0,
+    ceil_mode: bool = False,
+    count_include_pad: bool = True,
+    divisor_override: int | None = None,
+) -> Tensor: ...
+def avg_pool3d(
+    input: Tensor,
+    kernel_size: _int | _size,
+    stride: _int | _size | None = None,
+    padding: _int | _size = 0,
+    ceil_mode: bool = False,
+    count_include_pad: bool = True,
+    divisor_override: int | None = None,
+) -> Tensor: ...
+def binary_cross_entropy(
+    input: Tensor,
+    target: Tensor,
+    weight: Tensor | None = None,
+    reduction: str = ...,
+) -> Tensor: ...
+def col2im(
+    input: Tensor,
+    output_size: _int | _size,
+    kernel_size: _int | _size,
+    dilation: _int | _size,
+    stride: _int | _size | None = None,
+    padding: _int | _size = 0,
+) -> Tensor: ...
+def elu_(input: Tensor, alpha: float = ...) -> Tensor: ...
+def fractional_max_pool2d(
+    input: Tensor,
+    kernel_size: _int | _size,
+    output_size: _int | _size,
+    _random_samples: Tensor,
+) -> tuple[Tensor, Tensor]: ...
+def fractional_max_pool3d(
+    input: Tensor,
+    kernel_size: _int | _size,
+    output_size: _int | _size,
+    _random_samples: Tensor,
+) -> tuple[Tensor, Tensor]: ...
+def gelu(input: Tensor, approximate: str = ...) -> Tensor: ...
+def hardsigmoid(input: Tensor, *, out: Tensor | None = None) -> Tensor: ...
+def hardtanh(
+    input: Tensor,
+    min_val: float = ...,
+    max_val: float = ...,
+    *,
+    out: Tensor | None = None,
+) -> Tensor: ...
+def hardtanh_(
+    input: Tensor,
+    min_val: float = ...,
+    max_val: float = ...,
+) -> Tensor: ...
+def leaky_relu(
+    input: Tensor,
+    negative_slope: float = ...,
+    *,
+    out: Tensor | None = None,
+) -> Tensor: ...
+def leaky_relu_(input: Tensor, negative_slope: float = ...) -> Tensor: ...
+def linear(
+    input: Tensor,
+    weight: Tensor,
+    bias: Tensor | None = None,
+) -> Tensor: ...
+def log_sigmoid(input: Tensor) -> Tensor: ...
+def one_hot(tensor: Tensor, num_classes: int = ...) -> Tensor: ...
+def pad(
+    input: Tensor,
+    pad: Sequence[int],
+    mode: str = ...,
+    value: float | None = None,
+) -> Tensor: ...
+def scaled_dot_product_attention(
+    query: Tensor,
+    key: Tensor,
+    value: Tensor,
+    attn_mask: Tensor | None = None,
+    dropout_p: float = 0.0,
+    is_causal: bool = False,
+    scale: float | None = None,
+    enable_gqa: bool = False,
+) -> Tensor: ...
+def softplus(
+    input: Tensor,
+    beta: float = ...,
+    threshold: float = ...,
+) -> Tensor: ...
+def softshrink(input: Tensor, lambd: float = ...) -> Tensor: ...
+# Defined in aten/src/ATen/native/mkldnn/Linear.cpp
+def mkldnn_linear(input: Tensor, weight: Tensor, bias: Tensor | None) -> Tensor: ...
+# Defined at aten/src/ATen/native/mkldnn/MKLDNNConversions.cpp
+def mkldnn_reorder_conv2d_weight(
+    self: Tensor,
+    padding: list,
+    stride: list,
+    dilatation: list,
+    groups: int,
+) -> Tensor: ...
+def mkldnn_reorder_conv3d_weight(
+    self: Tensor,
+    padding: list,
+    stride: list,
+    dilatation: list,
+    groups: int,
+) -> Tensor: ...
+# Defined in aten/src/ATen/native/mkldnn/Prelu.cpp
+def mkldnn_prelu(input: Tensor, weight: Tensor) -> Tensor: ...
+# Defined at tools/autograd/templates/python_nn_functions.cpp
+@overload
+def _parse_to(
+    device: _device,
+    dtype: _dtype,
+    non_blocking: _bool,
+    copy: _bool,
+    *,
+    memory_format: memory_format,
+) -> tuple[_device, _dtype, _bool, memory_format]: ...
+@overload
+def _parse_to(
+    dtype: _dtype,
+    non_blocking: _bool,
+    copy: _bool,
+    *,
+    memory_format: memory_format,
+) -> tuple[_device, _dtype, _bool, memory_format]: ...
+@overload
+def _parse_to(
+    tensor: Tensor,
+    non_blocking: _bool,
+    copy: _bool,
+    *,
+    memory_format: memory_format,
+) -> tuple[_device, _dtype, _bool, memory_format]: ...
+# Defined in aten/src/ATen/native/PackedSequence.cpp
+def pad_sequence(
+    sequences: list[Tensor] | tuple[Tensor, ...],
+    batch_first: bool = False,
+    padding_value: float = 0.0,
+    padding_side: Literal["left", "right"] = "right",
+) -> Tensor: ...
+def flatten_dense_tensors(tensors: list[Tensor]) -> Tensor: ...
+def unflatten_dense_tensors(flat: Tensor, tensors: list[Tensor]) -> list[Tensor]: ...

phivenv/Lib/site-packages/torch/_C/_nvtx.pyi ADDED Viewed

	@@ -0,0 +1,9 @@

+# mypy: allow-untyped-defs
+# Defined in torch/csrc/cuda/shared/nvtx.cpp
+def rangePushA(message: str) -> int: ...
+def rangePop() -> int: ...
+def rangeStartA(message: str) -> int: ...
+def rangeEnd(int) -> None: ...
+def markA(message: str) -> None: ...
+def deviceRangeStart(message: str, stream: int) -> object: ...
+def deviceRangeEnd(range_handle: object, stream: int) -> None: ...

phivenv/Lib/site-packages/torch/_C/_onnx.pyi ADDED Viewed

	@@ -0,0 +1,39 @@

+# Defined in torch/csrc/onnx/init.cpp
+from enum import Enum
+PRODUCER_VERSION: str
+class TensorProtoDataType(Enum):
+    UNDEFINED = ...
+    FLOAT = ...
+    UINT8 = ...
+    INT8 = ...
+    UINT16 = ...
+    INT16 = ...
+    INT32 = ...
+    INT64 = ...
+    STRING = ...
+    BOOL = ...
+    FLOAT16 = ...
+    DOUBLE = ...
+    UINT32 = ...
+    UINT64 = ...
+    COMPLEX64 = ...
+    COMPLEX128 = ...
+    BFLOAT16 = ...
+    FLOAT8E5M2 = ...
+    FLOAT8E4M3FN = ...
+    FLOAT8E5M2FNUZ = ...
+    FLOAT8E4M3FNUZ = ...
+class OperatorExportTypes(Enum):
+    ONNX = ...
+    ONNX_ATEN = ...
+    ONNX_ATEN_FALLBACK = ...
+    ONNX_FALLTHROUGH = ...
+class TrainingMode(Enum):
+    EVAL = ...
+    PRESERVE = ...
+    TRAINING = ...

phivenv/Lib/site-packages/torch/_C/_profiler.pyi ADDED Viewed

	@@ -0,0 +1,246 @@

+from enum import Enum
+from typing import Literal
+from typing_extensions import TypeAlias
+from torch._C import device, dtype, layout
+# defined in torch/csrc/profiler/python/init.cpp
+class RecordScope(Enum):
+    FUNCTION = ...
+    BACKWARD_FUNCTION = ...
+    TORCHSCRIPT_FUNCTION = ...
+    KERNEL_FUNCTION_DTYPE = ...
+    CUSTOM_CLASS = ...
+    BUILD_FEATURE = ...
+    LITE_INTERPRETER = ...
+    USER_SCOPE = ...
+    STATIC_RUNTIME_OP = ...
+    STATIC_RUNTIME_MODEL = ...
+class ProfilerState(Enum):
+    Disable = ...
+    CPU = ...
+    CUDA = ...
+    NVTX = ...
+    ITT = ...
+    KINETO = ...
+    KINETO_GPU_FALLBACK = ...
+    KINETO_PRIVATEUSE1_FALLBACK = ...
+    KINETO_PRIVATEUSE1 = ...
+class ActiveProfilerType(Enum):
+    NONE = ...
+    LEGACY = ...
+    KINETO = ...
+    NVTX = ...
+    ITT = ...
+class ProfilerActivity(Enum):
+    CPU = ...
+    CUDA = ...
+    XPU = ...
+    MTIA = ...
+    HPU = ...
+    PrivateUse1 = ...
+class _EventType(Enum):
+    TorchOp = ...
+    Backend = ...
+    Allocation = ...
+    OutOfMemory = ...
+    PyCall = ...
+    PyCCall = ...
+    Kineto = ...
+class _ExperimentalConfig:
+    def __init__(
+        self,
+        profiler_metrics: list[str] = ...,
+        profiler_measure_per_kernel: bool = ...,
+        verbose: bool = ...,
+        performance_events: list[str] = ...,
+        enable_cuda_sync_events: bool = ...,
+    ) -> None: ...
+class ProfilerConfig:
+    def __init__(
+        self,
+        state: ProfilerState,
+        report_input_shapes: bool,
+        profile_memory: bool,
+        with_stack: bool,
+        with_flops: bool,
+        with_modules: bool,
+        experimental_config: _ExperimentalConfig,
+        trace_id: str | None = None,
+    ) -> None: ...
+class _ProfilerEvent:
+    start_tid: int
+    start_time_ns: int
+    children: list[_ProfilerEvent]
+    # TODO(robieta): remove in favor of `self.typed`
+    extra_fields: (
+        _ExtraFields_TorchOp
+        | _ExtraFields_Backend
+        | _ExtraFields_Allocation
+        | _ExtraFields_OutOfMemory
+        | _ExtraFields_PyCall
+        | _ExtraFields_PyCCall
+        | _ExtraFields_Kineto
+    )
+    @property
+    def typed(
+        self,
+    ) -> (
+        tuple[Literal[_EventType.TorchOp], _ExtraFields_TorchOp]
+        | tuple[Literal[_EventType.Backend], _ExtraFields_Backend]
+        | tuple[Literal[_EventType.Allocation], _ExtraFields_Allocation]
+        | tuple[Literal[_EventType.OutOfMemory], _ExtraFields_OutOfMemory]
+        | tuple[Literal[_EventType.PyCall], _ExtraFields_PyCall]
+        | tuple[Literal[_EventType.PyCCall], _ExtraFields_PyCCall]
+        | tuple[Literal[_EventType.Kineto], _ExtraFields_Kineto]
+    ): ...
+    @property
+    def name(self) -> str: ...
+    @property
+    def tag(self) -> _EventType: ...
+    @property
+    def id(self) -> int: ...
+    @property
+    def parent(self) -> _ProfilerEvent | None: ...
+    @property
+    def correlation_id(self) -> int: ...
+    @property
+    def end_time_ns(self) -> int: ...
+    @property
+    def duration_time_ns(self) -> int: ...
+class _TensorMetadata:
+    impl_ptr: int | None
+    storage_data_ptr: int | None
+    id: int | None
+    @property
+    def allocation_id(self) -> int | None: ...
+    @property
+    def layout(self) -> layout: ...
+    @property
+    def device(self) -> device: ...
+    @property
+    def dtype(self) -> dtype: ...
+    @property
+    def sizes(self) -> list[int]: ...
+    @property
+    def strides(self) -> list[int]: ...
+Scalar: TypeAlias = int | float | bool | complex
+Input: TypeAlias = _TensorMetadata | list[_TensorMetadata] | Scalar | None
+class _ExtraFields_TorchOp:
+    name: str
+    sequence_number: int
+    allow_tf32_cublas: bool
+    @property
+    def inputs(self) -> list[Input]: ...
+    @property
+    def scope(self) -> RecordScope: ...
+class _ExtraFields_Backend: ...
+class _ExtraFields_Allocation:
+    ptr: int
+    id: int | None
+    alloc_size: int
+    total_allocated: int
+    total_reserved: int
+    @property
+    def allocation_id(self) -> int | None: ...
+    @property
+    def device(self) -> device: ...
+class _ExtraFields_OutOfMemory: ...
+class _PyFrameState:
+    line_number: int
+    function_name: str
+    @property
+    def file_name(self) -> str: ...
+class _NNModuleInfo:
+    @property
+    def self_ptr(self) -> int: ...
+    @property
+    def cls_ptr(self) -> int: ...
+    @property
+    def cls_name(self) -> str: ...
+    @property
+    def parameters(
+        self,
+    ) -> list[tuple[str, _TensorMetadata, _TensorMetadata | None]]: ...
+class _OptimizerInfo:
+    @property
+    def parameters(
+        self,
+    ) -> list[
+        tuple[
+            # Parameter
+            _TensorMetadata,
+            #
+            # Gradient (if present during optimizer.step())
+            _TensorMetadata | None,
+            #
+            # Optimizer state for Parameter as (name, tensor) pairs
+            list[tuple[str, _TensorMetadata]],
+        ]
+    ]: ...
+class _ExtraFields_PyCCall:
+    @property
+    def caller(self) -> _PyFrameState: ...
+class _ExtraFields_PyCall:
+    @property
+    def callsite(self) -> _PyFrameState: ...
+    @property
+    def caller(self) -> _PyFrameState: ...
+    @property
+    def module(self) -> _NNModuleInfo | None: ...
+    @property
+    def optimizer(self) -> _OptimizerInfo | None: ...
+class _ExtraFields_Kineto: ...
+def _add_execution_trace_observer(output_file_path: str) -> bool: ...
+def _remove_execution_trace_observer() -> None: ...
+def _enable_execution_trace_observer() -> None: ...
+def _disable_execution_trace_observer() -> None: ...
+def _set_record_concrete_inputs_enabled_val(val: bool) -> None: ...
+def _set_fwd_bwd_enabled_val(val: bool) -> None: ...
+def _set_cuda_sync_enabled_val(val: bool) -> None: ...
+class CapturedTraceback: ...
+def gather_traceback(python: bool, script: bool, cpp: bool) -> CapturedTraceback: ...
+# The Dict has name, filename, line
+def symbolize_tracebacks(
+    to_symbolize: list[CapturedTraceback],
+) -> list[list[dict[str, str]]]: ...
+class _RecordFunctionFast:
+    def __init__(
+        self,
+        name: str,
+        input_values: list | tuple | None = None,
+        keyword_values: dict | None = None,
+    ) -> None: ...
+    def __enter__(self) -> None: ...
+    def __exit__(self, *exc_info: object) -> None: ...

phivenv/Lib/site-packages/torch/_C/_verbose.pyi ADDED Viewed

	@@ -0,0 +1,3 @@

+# Defined in torch/csrc/utils/verbose.cpp
+def mkl_set_verbose(enable: int) -> int: ...
+def mkldnn_set_verbose(level: int) -> int: ...

phivenv/Lib/site-packages/torch/_C_flatbuffer/__init__.pyi ADDED Viewed

	@@ -0,0 +1,11 @@

+# mypy: allow-untyped-defs
+from torch._C import LiteScriptModule, ScriptModule
+def _load_mobile_module_from_file(filename: str): ...
+def _load_mobile_module_from_bytes(bytes_: bytes): ...
+def _load_jit_module_from_file(filename: str): ...
+def _load_jit_module_from_bytes(bytes_: bytes): ...
+def _save_mobile_module(m: LiteScriptModule, filename: str): ...
+def _save_jit_module(m: ScriptModule, filename: str): ...
+def _save_mobile_module_to_bytes(m: LiteScriptModule) -> bytes: ...
+def _save_jit_module_to_bytes(m: ScriptModule) -> bytes: ...

phivenv/Lib/site-packages/torch/_awaits/__init__.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from __future__ import annotations
+from typing import Generic, TypeVar
+import torch
+__all__ = ['Await']
+W = TypeVar("W")
+class _PyAwaitMeta(type(torch._C._Await), type(Generic)):  # type: ignore[misc, no-redef]
+    pass
+class _Await(torch._C._Await, Generic[W], metaclass=_PyAwaitMeta):
+    r"""
+    Wrapper around a ``torch._C.Await`` which encapsulates delayed execution
+    of a callable. All manipulations happen with functions ``torch.jit._awaitable``,
+    ``torch.jit._awaitable_wait``, ``torch.jit._awaitable_nowait``.
+    Torch scriptable manipulations:
+    ``torch.jit._awaitable(func, *args)``
+    Creates ``Await[W]`` object, where W is return type of func.
+    Returns:
+    ``torch.jit._awaitable_wait(Await[W])``
+    Returns the result of the function, specified at ``_awaitable``,  with specified arguments.
+    Returns:
+        The result of type ``W`` of the function call. The result is owned by ``Await[W]``
+        and returned on all following ``_awaitable_wait`` calls.
+    ``torch.jit._awaitable_nowait(W)``
+    Returns:
+        Trivial ``Await[W]`` with specified result.
+    Only in eager mode:
+    ``fn() -> Callable[Tuple[Any], W]``
+    Returns:
+        Specified at ``_awaitable`` python function ``func``.
+    ``args() -> Tuple[Any]``
+    Returns:
+        Specified at ``_awaitable`` python args.
+    ``is_nowait() -> _bool``
+    Returns:
+        ``True`` if this object was created via ``_awaitable_nowait`` call (trivial `Await[W]`).
+    In eager mode ``Await[W]`` can be used as ``W`` i.e. attributes of W can be called on ``Await[W]``,
+    ``_awaitable_wait()`` call will be transparently added.
+    """

phivenv/Lib/site-packages/torch/_awaits/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (2.03 kB). View file

phivenv/Lib/site-packages/torch/_custom_op/__init__.py ADDED Viewed

File without changes

phivenv/Lib/site-packages/torch/_custom_op/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (160 Bytes). View file

phivenv/Lib/site-packages/torch/_custom_op/__pycache__/autograd.cpython-39.pyc ADDED Viewed

Binary file (8.9 kB). View file

phivenv/Lib/site-packages/torch/_custom_op/__pycache__/impl.cpython-39.pyc ADDED Viewed

Binary file (21.2 kB). View file

phivenv/Lib/site-packages/torch/_custom_op/autograd.py ADDED Viewed

	@@ -0,0 +1,307 @@

+# mypy: allow-untyped-defs
+import functools
+from collections import namedtuple
+import torch
+import torch.utils._pytree as pytree
+# NOTE [CustomOp autograd kernel indirection]
+# We register `inner` as the autograd kernel for this custom_op.
+# `inner` either calls the autograd formula registered by the user,
+# or goes into an `autograd_not_implemented` kernel.
+#
+# The reason why this indirection exists is
+# so that we can swap out the autograd kernel (the PyTorch dispatcher
+# doesn't actually allow us to do this). By default, we want
+# the `autograd_not_implemented` behavior, but then the user may come
+# and register something that is actually a backward formula
+def autograd_kernel_indirection(custom_op):
+    autograd_fallback = autograd_not_implemented(custom_op)
+    def inner(*args, **kwargs):
+        if custom_op._has_impl("autograd"):
+            kernel = custom_op._get_impl("autograd").func
+            return kernel(*args, **kwargs)
+        # As explained in NOTE ["backward", "save_for_backward", and "autograd"],
+        # after the user gives us "backward" and "save_for_backward", we generate
+        # the "autograd" impl. If the user only provided one, then we tell
+        # the user they've done something wrong.
+        if custom_op._has_impl("save_for_backward") or custom_op._has_impl("backward"):
+            missing = (
+                "save_for_backward" if custom_op._has_impl("backward") else "backward"
+            )
+            found = "save_for_backward" if missing == "backward" else "backward"
+            loc = custom_op._get_impl(found).location
+            raise RuntimeError(
+                f"We found a '{found}' registration for {custom_op} at "
+                f"{loc} but were unable to find a '{missing}' registration. "
+                f"To use the CustomOp API to register a backward formula, "
+                f"please provide us both a backward function and a "
+                f"'save for backward' function via `impl_backward` and "
+                f"`impl_save_for_backward` respectively."
+            )
+        return autograd_fallback(*args, **kwargs)
+    return inner
+# TODO(#101191): Use the actual C++ autograd not implemented fallback,
+# or change the default autograd fallback to the autograd not implemented fallback.
+def autograd_not_implemented(custom_op):
+    def kernel(*args, **kwargs):
+        if torch.is_grad_enabled() and pytree.tree_any(
+            lambda x: isinstance(x, torch.Tensor) and x.requires_grad, (args, kwargs)
+        ):
+            raise RuntimeError("Autograd has not been implemented for operator")
+        with torch._C._AutoDispatchBelowAutograd():
+            return custom_op(*args, **kwargs)
+    return kernel
+def mark_non_differentiable(ctx, output, output_differentiability):
+    # Output types are restricted to be:
+    # - Tensor
+    # - Tensor[]
+    # - int, bool, Scalar, float
+    # See _check_can_register_backward
+    if output_differentiability is not None:
+        if not isinstance(output, tuple):
+            tuple_output = (output,)
+        else:
+            tuple_output = output  # type: ignore[assignment]
+        assert len(output_differentiability) == len(tuple_output)
+        non_differentiable_tensors = []
+        for idx, (differentiable, out) in enumerate(
+            zip(output_differentiability, tuple_output)
+        ):
+            if isinstance(out, torch.Tensor):
+                if not differentiable:
+                    non_differentiable_tensors.append(out)
+                continue
+            if isinstance(out, list):
+                if not differentiable:
+                    non_differentiable_tensors.extend(out)
+                continue
+            if differentiable:
+                raise RuntimeError(
+                    f"With output_differentiability={output_differentiability}. "
+                    f"At idx {idx}, we received an object of type {type(out)} that "
+                    f"is not a Tensor, so it cannot have be marked as differentiable in "
+                    f"output_differentiability."
+                )
+        if non_differentiable_tensors:
+            ctx.mark_non_differentiable(*non_differentiable_tensors)
+def construct_autograd_kernel(
+    schema,
+    output_differentiability,
+    custom_op,
+    op_overload,
+    save_for_backward_fn,
+    backward_fn,
+):
+    def apply(*args):
+        flat_args, spec = pytree.tree_flatten(args)
+        out_spec = None
+        def forward(ctx, *flat_args):
+            ctx.set_materialize_grads(True)
+            args = pytree.tree_unflatten(list(flat_args), spec)
+            with torch._C._AutoDispatchBelowAutograd():
+                output = op_overload(*args)
+            # We use the info about args to give better error messages in backward
+            args_info = namedtuple_args(schema, pytree.tree_map(type, args))
+            save_for_backward_fn_inputs = namedtuple_args(schema, args)
+            to_save = save_for_backward_fn(save_for_backward_fn_inputs, output)
+            save_pytree_for_backward(ctx, (to_save, args_info))
+            mark_non_differentiable(ctx, output, output_differentiability)
+            nonlocal out_spec
+            flat_output, out_spec = pytree.tree_flatten(output)
+            return tuple(flat_output)
+        def backward(ctx, *flat_grad_output):
+            assert out_spec is not None
+            grads = pytree.tree_unflatten(list(flat_grad_output), out_spec)
+            saved, args_info = unpack_saved(ctx)
+            # There is nothing on the ctx object for now, it is just there so
+            # that we can add additional things in the future.
+            inner_ctx = object()
+            if not isinstance(grads, tuple):
+                grads = (grads,)
+            grad_inputs_dict = backward_fn(inner_ctx, saved, *grads)
+            # Massage the grad_inputs_dict to a form acceptable by
+            # autograd.Function.
+            validate_grad_inputs_dict(grad_inputs_dict, custom_op, args_info)
+            return grad_inputs_dict_to_flat_tuple(grad_inputs_dict, args_info)
+        generated_cls = gen_autograd_function(
+            custom_op._opname + "_customop", forward, backward
+        )
+        flat_output = generated_cls.apply(*flat_args)
+        assert out_spec is not None
+        return pytree.tree_unflatten(list(flat_output), out_spec)
+    return apply
+def gen_autograd_function(name, forward, backward):
+    generated_cls = type(
+        name,
+        (torch.autograd.Function,),
+        {
+            "forward": staticmethod(forward),
+            "backward": staticmethod(backward),
+        },
+    )
+    return generated_cls
+@functools.lru_cache
+def namedtuple_args_cls(schema):
+    attribs = [arg.name for arg in schema.arguments.flat_all]
+    name = str(schema.name) + "_args"
+    # mypy doesn't support dynamic namedtuple name
+    tuple_cls = namedtuple(name, attribs)  # type: ignore[misc]
+    return tuple_cls
+def namedtuple_args(schema, args):
+    assert isinstance(args, tuple)
+    tuple_cls = namedtuple_args_cls(schema)
+    return tuple_cls(*args)
+def validate_grad_inputs_dict(grad_inputs_dict, forward_op, args_info):
+    def error(what):
+        backward = forward_op._get_impl("backward")
+        raise RuntimeError(
+            f"In the backward function defined for {forward_op} at "
+            f"{backward.location} using the CustomOp API, {what}"
+        )
+    if not isinstance(grad_inputs_dict, dict):
+        error(
+            f"expected the output of the backward function to be a dict but "
+            f"got {type(grad_inputs_dict)}"
+        )
+    expected_keys = {
+        arg.name
+        for arg in forward_op._schema.arguments.flat_all
+        if arg.type.is_tensor_like()
+    }
+    actual_keys = grad_inputs_dict.keys()
+    if expected_keys != actual_keys:
+        error(
+            f"expected the returned grad_input dict to have keys "
+            f"{expected_keys} but got {actual_keys}. The backward "
+            f"function must return a gradient (can be None) for each arg "
+            f"to the CustomOp that may be a Tensor or Sequence[Tensor]. "
+            f"Args declared to be non-Tensor-like types should not appear "
+            f"in the grad_input dict"
+        )
+    for name, grad in grad_inputs_dict.items():
+        arg_info = getattr(args_info, name)
+        if isinstance(arg_info, list):
+            if not isinstance(grad, (tuple, list)):
+                error(
+                    f"for input '{name}' expected the grad_input dict to "
+                    f"hold a list of gradients but got object of type "
+                    f"{type(grad)}."
+                )
+            if not len(grad) == len(arg_info):
+                error(
+                    f"for input '{name}' expected the grad_input dict to "
+                    f"hold a list of {len(arg_info)} gradients but got "
+                    f"{len(grad)}"
+                )
+            for idx, (g, info) in enumerate(zip(grad, arg_info)):
+                if g is None:
+                    continue
+                if not isinstance(g, torch.Tensor):
+                    error(
+                        f"for input '{name}' expected the grad_input dict to "
+                        f"hold a list of None or Tensor gradients but got "
+                        f"object of {type(g)} at index {idx}"
+                    )
+                if not issubclass(info, torch.Tensor):
+                    error(
+                        f"for input '{name}', got a Tensor as the gradient "
+                        f"for the {idx}-th value but expected None because "
+                        f"the {idx}-th value was not a Tensor (it was "
+                        f"type {arg_info}"
+                    )
+            continue
+        if grad is None:
+            continue
+        if not isinstance(grad, torch.Tensor):
+            error(
+                f"got object of type {type(grad)} as the gradient for input "
+                f"'{name}', "
+                f"but expected the gradient to be either None or a Tensor"
+            )
+        if not issubclass(arg_info, torch.Tensor):
+            error(
+                f"got a Tensor as the gradient for input '{name}' but "
+                f"expected None as the gradient because input '{name}' "
+                f"was not a Tensor (it was type {arg_info})."
+            )
+def grad_inputs_dict_to_flat_tuple(grad_inputs_dict, args_info):
+    result = []
+    for name, arg_info in args_info._asdict().items():
+        if name not in grad_inputs_dict:
+            result.append(pytree.tree_map(lambda x: None, arg_info))
+            continue
+        result.append(grad_inputs_dict[name])
+    return tuple(pytree.tree_leaves(result))
+# Saves "stuff" (a pytree) onto the ctx object. Use unpack_saved to unpack it.
+# autograd.Function prefers that users use ctx.save_for_backward to
+# save Tensors (to avoid reference cycles) and for non-Tensors to go onto the
+# ctx object.
+def save_pytree_for_backward(ctx, stuff):
+    flat_stuff, spec = pytree.tree_flatten(stuff)
+    num_elts = len(flat_stuff)
+    tensor_idxs = [
+        idx for idx, thing in enumerate(flat_stuff) if isinstance(thing, torch.Tensor)
+    ]
+    non_tensor_idxs = [
+        idx
+        for idx, thing in enumerate(flat_stuff)
+        if not isinstance(thing, torch.Tensor)
+    ]
+    tensors = [thing for thing in flat_stuff if isinstance(thing, torch.Tensor)]
+    non_tensors = [thing for thing in flat_stuff if not isinstance(thing, torch.Tensor)]
+    ctx.spec = spec
+    ctx.num_elts = num_elts
+    ctx.save_for_backward(*tensors)
+    ctx.tensor_idxs = tensor_idxs
+    ctx.saved_non_tensors = non_tensors
+    ctx.non_tensor_idxs = non_tensor_idxs
+# Inverse operation to save_pytree_for_backward
+def unpack_saved(ctx):
+    flat_stuff = [None] * ctx.num_elts
+    for tensor, idx in zip(ctx.saved_tensors, ctx.tensor_idxs):
+        flat_stuff[idx] = tensor
+    for non_tensor, idx in zip(ctx.saved_non_tensors, ctx.non_tensor_idxs):
+        flat_stuff[idx] = non_tensor
+    stuff = pytree.tree_unflatten(flat_stuff, ctx.spec)
+    return stuff

phivenv/Lib/site-packages/torch/_custom_op/impl.py ADDED Viewed

	@@ -0,0 +1,715 @@

+# mypy: allow-untyped-defs
+import dataclasses
+import functools
+import inspect
+import sys
+import typing
+import warnings
+import weakref
+import torch
+import torch._C as _C
+import torch._library.infer_schema
+import torch.library as library
+from torch._library.infer_schema import infer_schema
+from torch.library import get_ctx
+from torchgen.model import (
+    BaseTy,
+    BaseType,
+    FunctionSchema,
+    ListType,
+    OperatorName,
+    SchemaKind,
+)
+from .autograd import autograd_kernel_indirection, construct_autograd_kernel
+"""
+torch._custom_op is deprecated. We shipped a production-ready version of it into torch.library.
+Please use those APIs instead.
+"""
+__all__ = ["custom_op", "CustomOp", "get_ctx"]
+SUPPORTED_DEVICE_TYPE_TO_KEY = {
+    "cpu": "CPU",
+    "cuda": "CUDA",
+}
+# We will not let users register CustomOps with anything that could look like
+# PyTorch internals to avoid confusion.
+RESERVED_NS = {
+    "prim",
+    "prims",
+    "aten",
+    "at",
+    "torch",
+    "pytorch",
+}
+def warn_deprecated():
+    warnings.warn(
+        "torch._custom_op is deprecated and will be removed in PyTorch 2.6, please "
+        "use the equivalent torch.library API instead.",
+        DeprecationWarning,
+    )
+def custom_op(
+    qualname: str, manual_schema: typing.Optional[str] = None
+) -> typing.Callable:
+    r"""
+    This API is deprecated, please use torch.library.custom_op instead
+    """
+    warn_deprecated()
+    def inner(func):
+        if not inspect.isfunction(func):
+            raise ValueError(
+                f"custom_op(...)(func): Expected `func` to be a Python "
+                f"function, got: {type(func)}"
+            )
+        ns, name = parse_qualname(qualname)
+        validate_namespace(ns)
+        if func.__name__ != name:
+            raise ValueError(
+                f"custom_op(qualname='{qualname}', ...)(func): expected `func` "
+                f"to have name '{name}' but got '{func.__name__}'. "
+                f"Please either change the name of `func` or the qualname that "
+                f"is passed to `custom_op`"
+            )
+        schema = (
+            infer_schema(func, mutates_args=())
+            if manual_schema is None
+            else manual_schema
+        )
+        schema_str = f"{name}{schema}"
+        function_schema = FunctionSchema.parse(schema_str)
+        validate_schema(function_schema)
+        if manual_schema is not None:
+            validate_function_matches_schema(function_schema, func)
+        lib = library.Library(ns, "FRAGMENT")
+        lib.define(schema_str)
+        ophandle = find_ophandle_or_throw(ns, function_schema.name)
+        result = CustomOp(
+            lib, ns, function_schema, name, ophandle, _private_access=True
+        )
+        result.__name__ = func.__name__
+        result.__module__ = func.__module__
+        result.__doc__ = func.__doc__
+        library.impl(lib, result._opname, "Autograd")(
+            autograd_kernel_indirection(weakref.proxy(result))
+        )
+        torch._C._dispatch_set_report_error_callback(
+            ophandle, functools.partial(report_error_callback, weakref.proxy(result))
+        )
+        return result
+    return inner
+# Global dictionary holding references to all CustomOp objects
+# Yes, it keeps all CustomOps alive (see NOTE [CustomOp lifetime])
+# Used to query the CustomOp associated with a specific C++ dispatcher operator.
+# An example usage is FakeTensor: FakeTensor checks if a specific operator
+# has an implementation registered via the CustomOp API.
+# Indexed by qualname (e.g. aten::foo)
+global_registry: dict[str, "CustomOp"] = {}
+class CustomOp:
+    r"""
+    This API is deprecated, please use torch.library.custom_op instead
+    """
+    def __init__(
+        self, lib, cpp_ns, schema, operator_name, ophandle, *, _private_access=False
+    ):
+        super().__init__()
+        warn_deprecated()
+        if not _private_access:
+            raise RuntimeError(
+                "The CustomOp constructor is private and we do not guarantee "
+                "BC for it. Please use custom_op(...) to create a CustomOp object"
+            )
+        name = f"{cpp_ns}::{operator_name}"
+        self._schema = schema
+        self._cpp_ns = cpp_ns
+        self._lib: library.Library = lib
+        self._ophandle: _C._DispatchOperatorHandle = ophandle
+        # Has the name of the op, e.g. "foo". We cache here for convenience.
+        self._opname: str = operator_name
+        # this is _opname but with namespace. e.g. "custom::foo"
+        self._qualname: str = name
+        self.__name__ = None  # mypy requires this
+        # NB: Some of these impls are registered as kernels to DispatchKeys.
+        # Modifying the _impls dict directly won't do anything in that case.
+        self._impls: dict[str, typing.Optional[FuncAndLocation]] = {}
+        # See NOTE [CustomOp autograd kernel indirection]
+        self._registered_autograd_kernel_indirection = False
+        global_registry[self._qualname] = self
+    def _register_autograd_kernel_indirection(self):
+        assert not self._registered_autograd_kernel_indirection
+        self._lib.impl(
+            self._opname, autograd_kernel_indirection(weakref.proxy(self)), "Autograd"
+        )
+        self._registered_autograd_kernel_indirection = True
+    # Records the impl and the source location in self._impls
+    # Note that this doesn't cause torch.library to use the impl, that
+    # needs to be done in a separate self._lib.impl call.
+    def _register_impl(self, kind, func, stacklevel=2):
+        if self._has_impl(kind):
+            func_and_location = self._impls[kind]
+            assert func_and_location is not None  # Pacify mypy
+            location = func_and_location.location
+            raise RuntimeError(
+                f"Attempting to register a {kind} impl for operator {self._qualname} "
+                f"that already has a {kind} impl registered from Python at "
+                f"{location}. This is not supported."
+            )
+        frame = inspect.getframeinfo(sys._getframe(stacklevel))
+        location = f"{frame.filename}:{frame.lineno}"
+        self._impls[kind] = FuncAndLocation(func, location)
+    def _get_impl(self, kind):
+        return self._impls[kind]
+    def _has_impl(self, kind):
+        return kind in self._impls
+    def _destroy(self):
+        # NOTE: [CustomOp lifetime]
+        # A CustomOp, once created, lives forever. The mechanism is that the
+        # global registry holds a reference to it. However, to make testing
+        # easier, we want to be able to destroy CustomOp objects.
+        # CustomOp._destroy does the job, though it leaves the CustomOp
+        # in a garbage state.
+        del self._lib
+        opnamespace = getattr(torch.ops, self._cpp_ns)
+        if hasattr(opnamespace, self._opname):
+            delattr(opnamespace, self._opname)
+        del global_registry[self._qualname]
+    def __repr__(self):
+        return f'<CustomOp(op="{self._qualname}")>'
+    def __call__(self, *args, **kwargs):
+        # Bypass torch.ops.* and directly do OperatorHandle::callBoxed.
+        # Using torch.ops.* is a bit of a pain (it can be slow and it has lifetime
+        # issues from caching operators that make testing CustomOp difficult).
+        result = _C._dispatch_call_boxed(self._ophandle, *args, **kwargs)
+        return result
+    def impl(
+        self,
+        device_types: typing.Union[str, typing.Iterable[str]],
+        _stacklevel=2,
+    ) -> typing.Callable:
+        r"""
+        This API is deprecated, please use torch.library.custom_op instead
+        """
+        if isinstance(device_types, str):
+            device_types = [device_types]
+        for device_type in device_types:
+            validate_device_type(device_type)
+        def inner(f):
+            for device_type in set(device_types):
+                self._check_doesnt_have_library_impl(device_type)
+                self._register_impl(device_type, f, stacklevel=_stacklevel)
+                dispatch_key = SUPPORTED_DEVICE_TYPE_TO_KEY[device_type]
+                library.impl(self._lib, self._opname, dispatch_key)(f)
+            return f
+        return inner
+    def _check_doesnt_have_library_impl(self, device_type):
+        if self._has_impl(device_type):
+            return
+        key = SUPPORTED_DEVICE_TYPE_TO_KEY[device_type]
+        if _C._dispatch_has_computed_kernel_for_dispatch_key(self._qualname, key):
+            raise RuntimeError(
+                f"impl(..., device_types={device_type}): the operator {self._qualname} "
+                f"already has an implementation for this device type via a "
+                f"pre-existing torch.library or TORCH_LIBRARY registration."
+            )
+    def impl_factory(self) -> typing.Callable:
+        r"""Register an implementation for a factory function."""
+        def inner(f):
+            self._register_impl("factory", f)
+            library.impl(self._lib, self._opname, "BackendSelect")(f)
+            return f
+        return inner
+    def impl_abstract(self, _stacklevel=2) -> typing.Callable:
+        r"""
+        This API is deprecated, please use torch.library.custom_op instead
+        """
+        def inner(f):
+            self._check_doesnt_have_library_meta_impl()
+            self._register_impl("abstract", f, stacklevel=_stacklevel)
+            location = self._get_impl("abstract").location
+            qualname = self._qualname
+            # Handle DispatchKey.Meta registration
+            @functools.wraps(f)
+            def f_with_ctx(*args, **kwargs):
+                def error_on_ctx():
+                    raise RuntimeError(
+                        f"Attempted to call get_ctx() for the meta implementation "
+                        f"for {qualname}."
+                        f"You have presumably called get_ctx() because the operator "
+                        f"has a data-dependent output shape; if so, there is no "
+                        f"such meta implementation and this error is the correct "
+                        f"behavior. Otherwise, please remove the call to get_ctx() "
+                        f"in the implementation registered with impl_abstract "
+                        f"at {location}"
+                    )
+                with torch._library.fake_impl.set_ctx_getter(error_on_ctx):
+                    return f(*args, **kwargs)
+            self._lib.impl(self._opname, f_with_ctx, "Meta")
+            return f
+        return inner
+    def _check_can_register_backward(self):
+        def error(detail):
+            raise RuntimeError(
+                f"Cannot use torch._custom_ops APIs to register backward "
+                f"formula for {detail}. Got operator "
+                f"{self._qualname} with schema: {schema}"
+            )
+        schema = self._schema
+        if schema.kind() != SchemaKind.functional:
+            error("non-functional operator")
+        rets = schema.returns
+        if not schema.returns:
+            error("operator with no returns")
+        assert len(rets) > 0
+        is_non_mutating_view = any(
+            r.annotation is not None and not r.annotation.is_write for r in rets
+        )
+        if is_non_mutating_view:
+            error("operator that returns views")
+        # We make assumptions about the schema's return types.
+        allowed_return_types = {
+            BaseType(BaseTy.int): "int",
+            BaseType(BaseTy.SymInt): "SymInt",
+            BaseType(BaseTy.bool): "bool",
+            BaseType(BaseTy.float): "float",
+            BaseType(BaseTy.Tensor): "Tensor",
+            ListType(BaseType(BaseTy.Tensor), None): "List[Tensor]",
+        }
+        for ret in schema.returns:
+            if ret.type in allowed_return_types:
+                continue
+            error(
+                f"operator with return not in {list(allowed_return_types.values())} (got {ret.type})"
+            )
+    def _check_doesnt_have_library_autograd_impl(self):
+        if self._registered_autograd_kernel_indirection:
+            return
+        if _C._dispatch_has_kernel_for_dispatch_key(
+            self._qualname, "CompositeImplicitAutograd"
+        ):
+            raise RuntimeError(
+                f"impl_backward/impl_save_for_backward: the operator {self._qualname} "
+                f"already has an implementation for this device type via a "
+                f"pre-existing registration to DispatchKey::CompositeImplicitAutograd."
+                f"CompositeImplicitAutograd operators do not need an autograd formula; "
+                f"instead, the operator will decompose into its constituents and those "
+                f"can have autograd formulas defined on them."
+            )
+        # We can improve this by adding "all Autograd<BACKEND> keys", but
+        # realistically people will just be using this API for CPU/CUDA for now.
+        for key in ["Autograd", "AutogradCPU", "AutogradCUDA"]:
+            if _C._dispatch_has_kernel_for_dispatch_key(self._qualname, key):
+                raise RuntimeError(
+                    f"impl_backward/impl_save_for_backward: "
+                    f"the operator {self._qualname} already has an Autograd kernel "
+                    f"registered to DispatchKey::{key} vi a pre-existing "
+                    f"torch.library or TORCH_LIBRARY registration. Please either "
+                    f"remove those registrations or don't use the torch._custom_ops APIs"
+                )
+    def _check_doesnt_have_library_meta_impl(self):
+        if self._has_impl("abstract"):
+            return
+        # If the user's operator is CompositeExplicitAutograd,
+        # allow them to impl_abstract. This is being pragmatic
+        # (existing custom ops may have CompositeExplicitAutograd
+        # registration that don't work with Meta kernels, so this
+        # gives them an escape hatch).
+        if _C._dispatch_has_kernel_for_dispatch_key(
+            self._qualname, "CompositeExplicitAutograd"
+        ) and not _C._dispatch_has_kernel_for_dispatch_key(self._qualname, "Meta"):
+            return
+        # Otherwise, if the user's already has a Meta kernel or their
+        # op is CompositeImplicitAutograd or some other alias dispatch key,
+        # raise.
+        # Special case for CompositeImplicitAutograd
+        if _C._dispatch_has_kernel_for_dispatch_key(
+            self._qualname, "CompositeImplicitAutograd"
+        ):
+            raise RuntimeError(
+                f"impl_abstract(...): the operator {self._qualname} "
+                f"already has an implementation for this device type via a "
+                f"pre-existing registration to DispatchKey::CompositeImplicitAutograd."
+                f"CompositeImplicitAutograd operators do not need an abstract impl; "
+                f"instead, the operator will decompose into its constituents and those "
+                f"can have abstract impls defined on them."
+            )
+        if _C._dispatch_has_kernel_for_dispatch_key(self._qualname, "Meta"):
+            raise RuntimeError(
+                f"impl_abstract(...): the operator {self._qualname} "
+                f"already has an DispatchKey::Meta implementation via a "
+                f"pre-existing torch.library or TORCH_LIBRARY registration. "
+                f"Please either remove that registration or don't call impl_abstract."
+            )
+    # NOTE ["backward", "save_for_backward", and "autograd"]
+    # As a part of the explicit autograd API, a user must provide us
+    # a "save_for_backward" function and a "backward" function.
+    # When both of these have been provided, then we automatically
+    # construct the "autograd" kernel.
+    def _register_autograd_kernel(self):
+        assert self._has_impl("backward")
+        assert self._has_impl("save_for_backward")
+        kernel = construct_autograd_kernel(
+            self._schema,
+            self._output_differentiability,
+            self,
+            get_op(self._qualname),
+            self._get_impl("save_for_backward").func,
+            self._get_impl("backward").func,
+        )
+        self._register_impl("autograd", kernel)
+    def impl_save_for_backward(self, _stacklevel=2):
+        r"""Register a function that tells us what to save for backward.
+        Please see impl_backward for more details.
+        """
+        def inner(f):
+            self._check_can_register_backward()
+            self._check_doesnt_have_library_autograd_impl()
+            if not self._registered_autograd_kernel_indirection:
+                self._register_autograd_kernel_indirection()
+            self._register_impl("save_for_backward", f, stacklevel=_stacklevel)
+            if self._has_impl("backward"):
+                self._register_autograd_kernel()
+        return inner
+    def impl_backward(self, output_differentiability=None, _stacklevel=2):
+        r"""
+        This API is deprecated, please use torch.library.custom_op instead
+        """
+        if output_differentiability is not None:
+            def yell():
+                raise RuntimeError(
+                    f"impl_backward(output_differentiability): expected "
+                    f"output_differentiability to be a list of bools with "
+                    f"length equal to the number of outputs of this CustomOp "
+                    f"got: {output_differentiability}"
+                )
+            if not isinstance(output_differentiability, list):
+                yell()
+            for diff in output_differentiability:
+                if not isinstance(diff, bool):
+                    yell()
+            if len(self._schema.returns) != len(output_differentiability):
+                yell()
+        def inner(f):
+            self._check_can_register_backward()
+            self._check_doesnt_have_library_autograd_impl()
+            if not self._registered_autograd_kernel_indirection:
+                self._register_autograd_kernel_indirection()
+            self._register_impl("backward", f, stacklevel=_stacklevel)
+            self._output_differentiability = output_differentiability
+            if self._has_impl("save_for_backward"):
+                self._register_autograd_kernel()
+        return inner
+@dataclasses.dataclass
+class FuncAndLocation:
+    func: typing.Callable
+    location: str
+def find_ophandle_or_throw(cpp_ns: str, operator_name: OperatorName):
+    overload_name = (
+        "" if operator_name.overload_name is None else operator_name.overload_name
+    )
+    return _C._dispatch_find_schema_or_throw(
+        f"{cpp_ns}::{str(operator_name.name)}", overload_name
+    )
+def validate_namespace(ns: str) -> None:
+    if "." in ns:
+        raise ValueError(
+            f'custom_op(..., ns="{ns}"): expected ns to not contain any . (and be a '
+            f"valid variable name)"
+        )
+    if ns in RESERVED_NS:
+        raise ValueError(
+            f"custom_op(..., ns='{ns}'): '{ns}' is a reserved namespace, "
+            f"please choose something else. "
+        )
+def validate_schema(schema: FunctionSchema) -> None:
+    if not torch._library.utils.is_functional_schema(schema):
+        raise ValueError(
+            f"custom_op only supports functional operators "
+            f"(ops that do not mutate any inputs, do not return "
+            f"views of the inputs, and has at least one return). "
+            f"Got the following non-functional schema: {schema}"
+        )
+    # For simplicity: don't allow self arguments
+    if schema.arguments.self_arg is not None:
+        raise ValueError(
+            f"custom_op does not support arguments named 'self'. Please "
+            f"rename your argument. Got: {schema}"
+        )
+def parse_qualname(qualname: str) -> tuple[str, str]:
+    names = qualname.split("::", 1)
+    if len(names) != 2:
+        raise ValueError(
+            f"Expected there to be a namespace in {qualname}, i.e. The "
+            f"operator name should look something like ns::foo"
+        )
+    if "." in names[1]:
+        raise ValueError(
+            f"The torch.custom_ops APIs do not handle overloads, "
+            f"i.e. operator names with '.' in them. "
+            f"Please name your operator something like ns::foo. "
+            f"Got: {qualname}"
+        )
+    return names[0], names[1]
+def validate_device_type(device_type: str) -> None:
+    if device_type not in SUPPORTED_DEVICE_TYPE_TO_KEY:
+        raise ValueError(
+            f"CustomOp.impl(device_types=[{device_type}, ...]): we only support device_type "
+            f"in {SUPPORTED_DEVICE_TYPE_TO_KEY.keys()}."
+        )
+def supported_param(param: inspect.Parameter) -> bool:
+    return param.kind in (
+        inspect.Parameter.POSITIONAL_OR_KEYWORD,
+        inspect.Parameter.KEYWORD_ONLY,
+    )
+def validate_function_matches_schema(
+    schema: FunctionSchema, func: typing.Callable
+) -> None:
+    sig = inspect.signature(func)
+    if not all(supported_param(p) for _, p in sig.parameters.items()):
+        raise ValueError(
+            f"custom_op(..., manual_schema)(func): positional-only args, "
+            f"varargs, and kwargs are not supported. Please rewrite `func` "
+            f"to not have them. Got `func` with signature: {sig}"
+        )
+    if (
+        any(
+            p.annotation is not inspect.Parameter.empty
+            for _, p in sig.parameters.items()
+        )
+        or sig.return_annotation is not inspect.Signature.empty
+    ):
+        raise ValueError(
+            f"custom_op(..., manual_schema)(func): When passing in a manual "
+            f"schema, we expect `func` to have no type annotations to avoid "
+            f"ambiguity. Got `func` with signature: {sig}"
+        )
+    positional = [
+        (name, param)
+        for name, param in sig.parameters.items()
+        if param.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD
+    ]
+    kwargonly = [
+        (name, param)
+        for name, param in sig.parameters.items()
+        if param.kind == inspect.Parameter.KEYWORD_ONLY
+    ]
+    def error():
+        raise ValueError(
+            f"custom_op(..., manual_schema)(func): When passing in a manual "
+            f"schema, we expect `func`'s signature to match `manual_schema` "
+            f"(aside from type annotations). "
+            f"func's signature: {sig}, manual_schema: {schema}"
+        )
+    def error_default_args():
+        raise ValueError(
+            f"custom_op(..., manual_schema)(func): "
+            f"neither func nor manual_schema should have default "
+            f"arguments. Got "
+            f"func's signature: {sig}, manual_schema: {schema}"
+        )
+    def compare(sig_args, schema_args):
+        if len(sig_args) != len(schema_args):
+            error()
+        for (name, param), arg in zip(sig_args, schema_args):
+            if name != arg.name:
+                error()
+            if param.default is not inspect.Parameter.empty or arg.default is not None:
+                error_default_args()
+    compare(positional, schema.arguments.flat_positional)
+    compare(kwargonly, schema.arguments.flat_kwarg_only)
+def report_error_callback(custom_op: typing.Any, key: str) -> None:
+    if key == "Undefined":
+        raise NotImplementedError(
+            f"{custom_op}: There were no Tensor inputs to this operator "
+            f"(e.g. you passed an empty list of Tensors). If your operator is a "
+            f"factory function (that is, it takes no Tensors and constructs "
+            f"a new one), then please use CustomOp.impl_factory to register "
+            f"an implementation for it"
+        )
+    if key == "Meta":
+        raise NotImplementedError(
+            f"{custom_op}: when running with device='Meta' tensors: there is no "
+            f"abstract impl registered for this CustomOp. Please register one via "
+            f"CustomOp.impl_abstract to get this CustomOp to work with Meta tensors"
+        )
+    if key in ("CPU", "CUDA"):
+        device = key.lower()
+        raise NotImplementedError(
+            f"{custom_op}: when running with device='{device}' tensors: there is no "
+            f"{device} impl registered for this CustomOp. Please register one via "
+            f"CustomOp.impl(device_type='{device}')"
+        )
+    raise NotImplementedError(
+        f"{custom_op}: No implementation for dispatch key {key}. It is likely "
+        f"that we have not added this functionality yet, please either open an "
+        f"issue or if you're feeling adventurous, use the low-level "
+        f"torch.library API"
+    )
+def custom_op_from_existing(op):
+    ns = op.namespace
+    lib = torch.library.Library(ns, "FRAGMENT")
+    name = op.name().split("::")[-1]
+    schema_str = str(op._schema)
+    # CustomOp expects the schema string without the namespace
+    schema_str = schema_str.split("::")[-1]
+    schema = FunctionSchema.parse(schema_str)
+    return CustomOp(lib, ns, schema, name, op, _private_access=True)
+def get_op(qualname):
+    def error_not_found():
+        raise ValueError(
+            f"Could not find the operator {qualname}. Please make sure you have "
+            f"already registered the operator and (if registered from C++) "
+            f"loaded it via torch.ops.load_library."
+        )
+    ns, name = parse_qualname(qualname)
+    if not hasattr(torch.ops, ns):
+        error_not_found()
+    opnamespace = getattr(torch.ops, ns)
+    if not hasattr(opnamespace, name):
+        error_not_found()
+    packet = getattr(opnamespace, name)
+    if not hasattr(packet, "default"):
+        error_not_found()
+    return packet.default
+def _find_custom_op(qualname, also_check_torch_library=False):
+    if qualname in global_registry:
+        return global_registry[qualname]
+    if not also_check_torch_library:
+        raise RuntimeError(
+            f'Could not find custom op "{qualname}". Did you register it via '
+            f"the torch._custom_ops API?"
+        )
+    overload = get_op(qualname)
+    result = custom_op_from_existing(overload)
+    return result
+def get_abstract_impl(qualname):
+    if qualname not in torch._custom_op.impl.global_registry:
+        return None
+    custom_op = torch._custom_op.impl.global_registry[qualname]
+    if custom_op is None:
+        return None
+    if not custom_op._has_impl("abstract"):
+        return None
+    return custom_op._get_impl("abstract").func
+def _custom_op_with_schema(qualname, schema, needs_fixed_stride_order=True):
+    ns, name = qualname.split("::")
+    schema_str = f"{name}{schema}"
+    function_schema = FunctionSchema.parse(schema_str)
+    validate_schema(function_schema)
+    tags = [torch._C.Tag.needs_fixed_stride_order] if needs_fixed_stride_order else []
+    lib = library.Library(ns, "FRAGMENT")
+    lib.define(schema_str, tags=tags)
+    ophandle = find_ophandle_or_throw(ns, function_schema.name)
+    result = CustomOp(lib, ns, function_schema, name, ophandle, _private_access=True)
+    result._register_autograd_kernel_indirection()
+    torch._C._dispatch_set_report_error_callback(
+        ophandle, functools.partial(report_error_callback, weakref.proxy(result))
+    )
+    return get_op(qualname)

phivenv/Lib/site-packages/torch/_decomp/__init__.py ADDED Viewed

	@@ -0,0 +1,544 @@

+# mypy: allow-untyped-defs
+import inspect
+from collections import defaultdict
+from collections.abc import Sequence
+from functools import lru_cache, partial, wraps
+from itertools import chain
+from typing import Callable, Optional, TYPE_CHECKING, TypeVar, Union
+from typing_extensions import ParamSpec
+if TYPE_CHECKING:
+    from torch.export.decomp_utils import CustomDecompTable
+import torch
+import torch.library
+from torch._ops import HigherOrderOperator, OperatorBase, OpOverload, OpOverloadPacket
+from torch._prims_common import CustomOutParamAnnotation
+from torch._subclasses.functional_tensor import FunctionalTensor
+from torch.utils import _pytree as pytree
+__all__ = [
+    "decomposition_table",
+    "pre_autograd_decomposition_table",
+    "meta_table",
+    "register_decomposition",
+    "get_decompositions",
+    "core_aten_decompositions",
+    "_should_decompose_because_unsafe_op",
+]
+_T = TypeVar("_T")
+_P = ParamSpec("_P")
+# TODO: relax key type here; torch registrations should be possible to; but
+# right now this type is accurate
+global_decomposition_table: dict[str, dict[torch._ops.OperatorBase, Callable]] = (
+    defaultdict(dict)
+)
+decomposition_table = global_decomposition_table["post_autograd"]
+pre_autograd_decomposition_table = global_decomposition_table["pre_autograd"]
+meta_table = global_decomposition_table["meta"]
+def _should_decompose_because_unsafe_op(op: torch._ops.OperatorBase) -> bool:
+    """
+    Returns True if the op must always decompose in export/compile tracing system
+    In export, we always decompose certain CIA ops that are tagged with
+    maybe_aliasing_or_mutating because we statically need to know if the op is
+    mutating or not. But these CIA ops could have different behaviour in runtime.
+    native_batch_norm is a prim op which has a wrong schema and it needs to be replaced
+    with correct schema. But until then, we will force decompose it via this tag.
+    """
+    if not isinstance(op, torch._ops.OpOverload):
+        return False
+    if torch.Tag.maybe_aliasing_or_mutating in op.tags:
+        return True
+    return op == torch.ops.aten.native_batch_norm.default
+def _add_op_to_registry(registry, op, fn):
+    """
+    This is an internal API for adding an op to the decomposition table.
+    If op is OpOverload, it will be added to the registry directly.
+    If op is OpOverloadPacket, all the valid op_overloads in the packet will be added to the registry.
+    """
+    overloads: list[Union[torch._ops.OperatorBase]] = []
+    if isinstance(op, HigherOrderOperator):
+        # There's no concept of overloads for HigherOrderOperator
+        registry[op] = fn
+        return
+    elif isinstance(op, OpOverload):
+        overloads.append(op)
+    else:
+        assert isinstance(op, OpOverloadPacket)
+        for ol in op.overloads():
+            overloads.append(getattr(op, ol))
+    for op_overload in overloads:
+        if op_overload in registry:
+            raise RuntimeError(f"duplicate registrations for {op_overload}")
+        # TorchScript dumps a bunch of extra nonsense overloads
+        # which don't have corresponding dispatcher entries, we need
+        # to filter those out, e.g aten.add.float_int
+        if torch._C._dispatch_has_kernel(op_overload.name()):
+            registry[op_overload] = fn
+def _convert_out_params(f):
+    out_annotation = f.__annotations__.get("out")
+    # If there are no out params, do not wrap the function.
+    if not out_annotation:
+        return f
+    # Hack to detect when out is a Tuple. There seems to be no pretty way of doing this
+    if getattr(out_annotation, "__origin__", None) is tuple:
+        sig = inspect.signature(f)
+        out_names = sig.return_annotation._fields
+        # If out is a tuple, we need to register a function that unpacks all the out
+        # elements as this is what native_functions.yaml expects
+        @wraps(f)
+        def _fn(*args, **kwargs):
+            out_kwargs = tuple(kwargs.pop(o, None) for o in out_names)
+            # Either all of the out kwargs are set or none of them
+            is_none = out_kwargs[0] is None
+            assert all((o is None) == is_none for o in out_kwargs)
+            return f(*args, **kwargs, out=None if is_none else out_kwargs)
+        out_params = [
+            inspect.Parameter(
+                o,
+                kind=inspect.Parameter.KEYWORD_ONLY,
+                default=None,
+                annotation=t,
+            )
+            for o, t in zip(out_names, out_annotation.__args__)
+        ]
+        # Drop the out parameter and concatenate the new kwargs in the signature
+        params = chain((v for k, v in sig.parameters.items() if k != "out"), out_params)
+        _fn.__signature__ = inspect.Signature(  # type: ignore[attr-defined]
+            parameters=params,  # type: ignore[arg-type]
+            return_annotation=sig.return_annotation,
+        )
+        # Drop the out parameter and concatenate the new kwargs in the annotations
+        _fn.__annotations__ = {k: v for k, v in f.__annotations__.items() if k != "out"}
+        for o in out_params:
+            _fn.__annotations__[o.name] = o.annotation
+        # Propagate that this function is wrapped by `out_wrapper`
+        _fn._torch_decompositions_out_wrapper = f._torch_decompositions_out_wrapper  # type: ignore[attr-defined]
+        return _fn
+    # Alternatively, there may be a single tensor out parameter with a name
+    # other than "out". This will need special treatment and is indicated by an
+    # annotation, which we will remove here so it is not exposed after wrapping.
+    custom_out_param_name = f.__annotations__.pop(CustomOutParamAnnotation, None)
+    if custom_out_param_name:
+        @wraps(f)
+        def _fn(*args, **kwargs):
+            out_kwarg = kwargs.pop(custom_out_param_name, None)
+            return f(*args, **kwargs, out=out_kwarg)
+        out_param = inspect.Parameter(
+            custom_out_param_name,
+            kind=inspect.Parameter.KEYWORD_ONLY,
+            default=None,
+            annotation=out_annotation,
+        )
+        # Drop the out parameter and concatenate the new kwarg in the signature
+        sig = inspect.signature(f)
+        params = chain(
+            (v for k, v in sig.parameters.items() if k != "out"), (out_param,)
+        )
+        _fn.__signature__ = inspect.Signature(  # type: ignore[attr-defined]
+            parameters=params,  # type: ignore[arg-type]
+            return_annotation=sig.return_annotation,
+        )
+        # Drop the out parameter and concatenate the new kwargs in the annotations
+        _fn.__annotations__ = {k: v for k, v in f.__annotations__.items() if k != "out"}
+        _fn.__annotations__[out_param.name] = out_param.annotation
+        return _fn
+    return f
+def register_decomposition(
+    aten_op, registry=None, *, type="post_autograd", unsafe=False
+) -> Callable[[Callable[_P, _T]], Callable[_P, _T]]:
+    """
+    A decorator to register a function as a decomposition to the Python
+    decomposition table.  Use it like this::
+        @register_decomposition(torch.ops.aten.clamp_min)
+        def clamp_min(x):
+            return torch.clamp(self, min=min)
+    If you are writing a new decomposition, consider contributing it
+    directly to PyTorch in torch._decomp.decompositions.
+    This API is experimental; we are almost certainly going to extend
+    the API when we make decompositions eligible for use in transforms (e.g.,
+    autograd) and not just backend tracing, where we then need to know if a
+    decomposition can be used to simulate a transform.
+    By default, we also will register it to the Meta key of dispatcher,
+    and replace the c++ Meta implementation if there is already one.
+    unsafe kwarg is for reuse of this function for registering non-function
+    things
+    """
+    assert type in {"post_autograd", "pre_autograd", "meta"}
+    def decomposition_decorator(fn: Callable[_P, _T]) -> Callable[_P, _T]:
+        orig_fn = fn
+        if not unsafe:
+            fn = _convert_out_params(fn)
+        nonlocal registry
+        if registry is None:
+            registry = global_decomposition_table[type]
+        def register(op):
+            _add_op_to_registry(registry, op, fn)
+        # To handle allowing multiple aten_ops at once
+        pytree.tree_map_(register, aten_op)
+        return orig_fn
+    return decomposition_decorator
+def get_decompositions(
+    aten_ops: Sequence[Union[torch._ops.OperatorBase, OpOverloadPacket]],
+    type: str = "post_autograd",
+) -> dict[torch._ops.OperatorBase, Callable]:
+    """
+    Retrieve a dictionary of decompositions corresponding to the list of
+    operator overloads and overload packets passed as input.  Overload
+    packets will include all decomposed overloads in the packet.  If there is
+    no decomposition for a requested operator, it is silently ignored.
+    This API is experimental; we are almost certainly going to give an alternate,
+    more recommended formulation, where a user provides the set of operators
+    they know how to implement, and we provide decompositions for everything
+    not in this set.
+    """
+    assert type in {"post_autograd", "pre_autograd", "meta"}
+    registry = global_decomposition_table[type]
+    packets_to_overloads = defaultdict(list)
+    for opo in registry:
+        if isinstance(opo, (OpOverload, OpOverloadPacket)):
+            packets_to_overloads[opo.overloadpacket].append(opo)
+    decompositions: dict[torch._ops.OperatorBase, Callable] = {}
+    for op in aten_ops:
+        if isinstance(op, OpOverloadPacket) and op in packets_to_overloads:
+            for op_overload in packets_to_overloads[op]:
+                decompositions[op_overload] = registry[op_overload]
+        elif isinstance(op, (torch._ops.OperatorBase)) and op in registry:
+            decompositions[op] = registry[op]
+    return decompositions
+def remove_decompositions(
+    decompositions: dict[torch._ops.OperatorBase, Callable],
+    aten_ops: Sequence[Union[OpOverload, OpOverloadPacket]],
+) -> None:
+    """
+    Given a dictionary of decompositions obtained from get_decompositions(), removes
+    operators associated with a list of operator overloads and overload packets passed
+    as input. If the decomposition dictionary does not contain a decomposition that is
+    specified to be removed, it is silently ignored.
+    """
+    for op in aten_ops:
+        if isinstance(op, OpOverloadPacket):
+            for overload_name in op.overloads():
+                opo = getattr(op, overload_name)
+                decompositions.pop(opo, None)
+        elif isinstance(op, OpOverload):
+            decompositions.pop(op, None)
+# populate the table
+import torch._decomp.decompositions
+import torch._refs
+def core_aten_decompositions() -> "CustomDecompTable":
+    from torch.export.exported_program import default_decompositions
+    return default_decompositions()
+# See NOTE [Core ATen Ops]
+#
+# list was copied from torch/_inductor/decomposition.py
+# excluding decompositions that results in prim ops
+# Resulting opset of decomposition is core aten ops
+def _core_aten_decompositions_post_autograd() -> dict[
+    torch._ops.OperatorBase, Callable
+]:
+    aten = torch.ops.aten
+    return get_decompositions(
+        [
+            aten.addcdiv,
+            aten.addcdiv_,
+            aten.addcmul,
+            aten.addcmul_,
+            aten.addr,
+            aten.affine_grid_generator,
+            aten.alias_copy,
+            aten.all,
+            aten.aminmax,
+            aten.arange.default,
+            aten.arange.start,
+            aten.avg_pool2d_backward,
+            aten.baddbmm,
+            aten.binary_cross_entropy,
+            aten.binary_cross_entropy_backward,
+            aten.binary_cross_entropy_with_logits,
+            aten.block_diag,
+            aten.bernoulli.p,
+            aten.bernoulli.default,
+            aten.celu,
+            aten.celu_,
+            aten.channel_shuffle,
+            aten.clamp_max,
+            aten.clamp_min,
+            aten.col2im,
+            aten.count_nonzero,
+            aten.linalg_cross,
+            aten.cudnn_batch_norm,
+            aten.cudnn_batch_norm_backward,
+            aten.miopen_batch_norm_backward,
+            aten.deg2rad,
+            aten.deg2rad_,
+            aten.detach,
+            aten.diag_embed,
+            aten.diagonal_backward,
+            aten.diagonal_copy,
+            aten.dot,
+            aten.vdot,
+            aten.elu_,
+            aten.elu_backward,
+            aten._embedding_bag,
+            aten.embedding_dense_backward,
+            aten.empty_like,
+            aten._euclidean_dist.default,
+            aten.expand_as,
+            aten.expand_copy,
+            aten.eye,
+            aten.fill,
+            aten.fill_,
+            aten.floor_divide,
+            aten.frac,
+            aten.frac_,
+            aten._fused_moving_avg_obs_fq_helper,
+            aten.gelu_,
+            aten.gelu_backward,
+            aten.glu,
+            aten.glu_backward,
+            aten.hardshrink,
+            aten.hardsigmoid,
+            aten.hardsigmoid_,
+            aten.hardsigmoid_backward,
+            aten.hardswish,
+            aten.hardswish_,
+            aten.hardswish_backward,
+            aten.hardtanh_,
+            aten.hardtanh_backward,
+            aten.heaviside,
+            aten.heaviside_,
+            aten.huber_loss,
+            aten.huber_loss_backward,
+            aten.im2col,
+            aten.index_add.out,
+            aten.index_add.default,
+            aten.index_add_,
+            aten.index_copy.out,
+            aten.index_copy.default,
+            aten.index_copy_,
+            aten.index_fill.int_Scalar,
+            aten.index_fill.int_Tensor,
+            aten.index_fill.int_Scalar_out,
+            aten.index_fill.int_Tensor_out,
+            aten.index_fill_,
+            aten.isin,
+            aten.isneginf,
+            aten.isposinf,
+            aten.l1_loss,
+            aten._lazy_clone,
+            aten._test_parallel_materialize,
+            aten.leaky_relu_,
+            aten.leaky_relu_backward,
+            aten.lerp,
+            aten.lerp_,
+            aten.linspace,
+            aten.logaddexp,
+            aten.logaddexp2,
+            aten.logit,
+            aten.logit_,
+            aten.logit_backward,
+            aten.log_sigmoid_backward,
+            aten.log_sigmoid_forward,
+            aten._log_softmax_backward_data,
+            aten.logspace,
+            aten.logsumexp.default,
+            aten.masked_fill,
+            aten.masked_fill_,
+            aten.max_unpool2d,
+            aten.max_unpool3d,
+            aten.mish,
+            aten.mish_,
+            aten.mse_loss,
+            aten.mse_loss_backward,
+            aten.multi_margin_loss,
+            aten.multilabel_margin_loss_forward,
+            aten.mv,
+            aten.mvlgamma,
+            aten.mvlgamma_,
+            aten.nansum,
+            aten.nan_to_num,
+            aten.nan_to_num_,
+            aten.narrow,
+            aten.native_batch_norm_backward,
+            aten.native_dropout_backward,
+            aten.native_group_norm_backward,
+            aten.native_layer_norm_backward,
+            aten.new_empty,
+            aten.new_full,
+            aten.new_ones,
+            aten.new_zeros,
+            aten.nll_loss2d_forward,
+            aten.nll_loss2d_backward,
+            aten.nll_loss_backward,
+            aten.nll_loss_forward,
+            aten.norm.ScalarOpt_dtype,
+            aten.norm.Scalar,
+            aten.norm.ScalarOpt_dim_dtype,
+            aten.norm.ScalarOpt_dim,
+            aten.norm.dtype_out,
+            aten.norm.out,
+            aten.norm.names_dtype_out,
+            aten.norm.names_out,
+            aten.norm.ScalarOpt_dtype_out,
+            aten.norm.Scalar_out,
+            aten.ones,
+            aten.ones_like,
+            aten.pixel_shuffle,
+            aten.pixel_unshuffle,
+            aten._prelu_kernel,
+            aten._prelu_kernel_backward,
+            aten._reshape_alias,
+            aten.rad2deg,
+            aten.rad2deg_,
+            aten.reflection_pad1d,
+            aten.reflection_pad1d_backward,
+            aten.reflection_pad2d,
+            aten.reflection_pad2d_backward,
+            aten.reflection_pad3d,
+            aten.reflection_pad3d_backward,
+            aten.replication_pad1d,
+            aten.replication_pad2d,
+            aten.replication_pad3d,
+            aten.renorm,
+            aten.renorm_,
+            aten.replication_pad2d,
+            aten.resize_as,
+            aten.roll,
+            aten.rot90,
+            aten.rrelu_with_noise,
+            aten.rrelu_with_noise_,
+            aten.rsub,
+            aten._safe_softmax,
+            aten._scaled_dot_product_flash_attention_for_cpu.default,
+            aten.select_backward,
+            aten.select_scatter,
+            aten.sgn,
+            aten.sgn_,
+            aten.sigmoid_backward,
+            aten.silu,
+            aten.silu_,
+            aten.silu_backward.grad_input,
+            aten.sinc,
+            aten.sinc_,
+            aten.slice_backward,
+            aten.smooth_l1_loss,
+            aten.smooth_l1_loss_backward,
+            aten.soft_margin_loss,
+            aten.soft_margin_loss_backward,
+            aten._softmax_backward_data,
+            aten.softplus,
+            aten.softplus_backward,
+            aten.softshrink,
+            aten.special_entr,
+            aten.special_log_ndtr,
+            aten.special_xlog1py,
+            aten.split.Tensor,
+            aten.split_with_sizes_copy,
+            aten.squeeze_copy,
+            aten.squeeze.default,
+            aten.squeeze.dim,
+            aten.std.correction,
+            aten.std.out,
+            aten.std.correction_out,
+            aten.std.names_out,
+            aten.std.correction_names_out,
+            aten.std_mean.correction,
+            aten.std_mean.correction_out,
+            aten.stack,
+            aten.sum.default,
+            aten.sum.out,
+            aten.t,
+            aten.t_copy,
+            aten.take,
+            aten.tanh_backward,
+            aten.threshold,
+            aten.threshold_,
+            aten.threshold_backward,
+            aten.trace,
+            aten.transpose.int,
+            aten.transpose_copy,
+            aten.tril,
+            aten.tril_,
+            aten.triu,
+            aten.triu_,
+            aten.unbind,
+            aten.unfold_backward,
+            aten.unfold_copy,
+            aten._unsafe_index,
+            aten._unsafe_index_put,
+            aten._unsafe_masked_index,
+            aten._unsafe_masked_index_put_accumulate,
+            aten.unsafe_split.Tensor,
+            aten.unsafe_split_with_sizes,
+            aten.unsqueeze_copy,
+            aten._unsafe_view,
+            aten.upsample_linear1d,
+            aten.upsample_bilinear2d.out,
+            aten.upsample_trilinear3d.out,
+            aten.upsample_nearest2d_backward,
+            aten.view_as_complex,
+            aten.xlogy,
+            aten.xlogy_,
+            aten.zero,
+            aten.zero_,
+            aten.zeros,
+            aten.zeros_like,
+            aten._chunk_cat,
+            aten._weight_norm_interface,
+        ]
+    )

phivenv/Lib/site-packages/torch/_decomp/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (14 kB). View file

phivenv/Lib/site-packages/torch/_decomp/__pycache__/decompositions_for_jvp.cpython-39.pyc ADDED Viewed

Binary file (6.7 kB). View file

phivenv/Lib/site-packages/torch/_decomp/__pycache__/decompositions_for_rng.cpython-39.pyc ADDED Viewed

Binary file (8.1 kB). View file

phivenv/Lib/site-packages/torch/_decomp/decompositions.py ADDED Viewed

The diff for this file is too large to render. See raw diff

phivenv/Lib/site-packages/torch/_decomp/decompositions_for_jvp.py ADDED Viewed

	@@ -0,0 +1,335 @@

+# mypy: allow-untyped-decorators
+# mypy: allow-untyped-defs
+import inspect
+from typing import Callable, Optional
+import torch
+import torch._decomp
+from torch import Tensor
+from torch._prims_common.wrappers import _maybe_remove_out_wrapper
+decomposition_table = torch._decomp.decomposition_table
+decomposition_table_for_jvp: dict[torch._ops.OperatorBase, Callable] = {}
+register_decomposition = torch._decomp.register_decomposition
+aten = torch.ops.aten
+# NOTE: [forward-mode AD decompositions mechanism]
+#
+# The mechanism is in VariableType,
+#   IF any inputs have forward grad
+#      AND there is no forward AD formula implemented
+#      AND the functions are actually differentiable
+#   run the decomposition
+#      See run_jit_decomposition_with_args_for_jvp
+#      We currently use python decompositions that we torchscript.
+#
+# Note that we would be building the backward graph at the decomposed level
+# too, but that is OK, because we would've errored out otherwise anyway.
+#
+# TODO: The mechanism we are using to register decompositions doesn't
+# seem to be exclusively used for jvp. So open question here is whether
+# torch/csrc/jit/runtime/decomposition_registry.cpp is being used for other things.
+# If that is the case, we may go down the decomposition path unexpectedly
+# (and possibly produce an unintelligible error) vs erroring out earlier and
+# printing that the forward AD formula is not implemented.
+#
+# The solution to this may be to have an explicitly white list control when
+# to enable the decomposition.
+def maybe_register_decomposition(op):
+    def decorator(f):
+        try:
+            return register_decomposition(op)(f)
+        except Exception:
+            return f
+    return decorator
+# Functions where we need a special decomposition for jvp but there's another version that
+# should be used more generally (ex. for jvp we need to recompute the mean and variance for
+# the backwards of a normalization function. Without jvp, it should use the saved value)
+decomposition_table_for_jvp = {}
+def register_decomposition_for_jvp(fn):
+    return register_decomposition(fn, registry=decomposition_table_for_jvp)
+def _register_jit_decomposition_for_jvp(decomp, use_python=False):
+    if decomp in decomposition_table_for_jvp:
+        decomposition_table_used = decomposition_table_for_jvp
+    elif decomp in decomposition_table:
+        decomposition_table_used = decomposition_table
+    else:
+        raise RuntimeError(f"could not find decomposition for {decomp}")
+    decomp_fn = decomposition_table_used[decomp]
+    # `out_wrapper` extends a decompositions signature with
+    # an `out` parameter. However jit will use the unwrapped function's
+    # signature instead so we need to unwrap here to prevent an error
+    decomp_fn = _maybe_remove_out_wrapper(decomp_fn)
+    if use_python:
+        decomp_fn = torch.jit.ignore(decomp_fn)
+        sig = inspect.signature(decomp_fn)
+        # Create a string wrapping the function from the signature
+        # example output:
+        # def wrapped_decomp(x: torch.Tensor, y: int, z: int):
+        #   return decomp_fn(x, y, z)
+        # Thanks copilot!
+        def get_function_def(sig):
+            param_def = [f"{param_str}" for param_str in sig.parameters.values()]
+            param_use = [f"{param_str}" for param_str in sig.parameters.keys()]
+            return f"def wrapped_decomp({', '.join(param_def)}):\n  return decomp_fn({', '.join(param_use)})\n"
+        f_str = get_function_def(sig)
+        graph = torch.jit.CompilationUnit(f_str).wrapped_decomp.graph
+    else:
+        graph = torch.jit.script(decomp_fn).graph
+    torch.jit._register_decomposition(decomp, graph)
+# The only decompositions here are temporary or hacks for the purposes of jvp
+# TODO: do these also belong here?
+@maybe_register_decomposition(aten.trace.default)
+def trace(self: Tensor) -> Tensor:
+    return torch.sum(torch.diag(self))
+@maybe_register_decomposition(aten.log_sigmoid_forward.default)
+def log_sigmoid_forward(self: Tensor) -> tuple[Tensor, Tensor]:
+    min = torch.minimum(self.new_zeros(()), self)
+    z = torch.exp(-torch.abs(self))
+    if self.is_cuda or self.is_xpu:
+        buffer = self.new_zeros((0,))
+    else:
+        buffer = z
+    return min - torch.log1p(z), buffer
+def recompute_mean_var(
+    input: Tensor, rstd: Tensor, inner_dim_indices: list[int], keepdim: bool
+):
+    # for most norm decompositions, it will be the same as the core version except for here.
+    # We recompute the mean and variance so that they track gradients through input
+    mean = torch.mean(input, dim=inner_dim_indices, keepdim=keepdim)
+    var = torch.var(input, dim=inner_dim_indices, unbiased=False, keepdim=keepdim)
+    eps = torch.pow(1 / rstd, 2) - var  # this makes me so sad inside
+    eps = eps.detach()
+    rstd = 1 / torch.sqrt(var + eps)
+    return mean, rstd
+@register_decomposition_for_jvp(aten.native_layer_norm_backward)
+def native_layer_norm_backward(
+    grad_out: Tensor,
+    input: Tensor,
+    normalized_shape: list[int],
+    mean: Tensor,
+    rstd: Tensor,
+    weight: Optional[Tensor],
+    bias: Optional[Tensor],
+    output_mask: list[bool],
+) -> tuple[Optional[Tensor], Optional[Tensor], Optional[Tensor]]:
+    input_shape = input.shape
+    input_ndim = input.dim()
+    axis = input_ndim - len(normalized_shape)
+    inner_dims = input_shape[axis:]
+    outer_dims = input_shape[:axis]
+    inner_dim_indices = list(range(axis, input_ndim))
+    outer_dim_indices = list(range(0, axis))
+    N = 1
+    for i in inner_dims:
+        N *= i
+    M = 1
+    for i in outer_dims:
+        M *= i
+    if M <= 0 or N <= 0:
+        return (
+            input.new_zeros(input_shape),
+            input.new_zeros(input_shape[axis:]),
+            input.new_zeros(input_shape[axis:]),
+        )
+    mean_, rstd_ = recompute_mean_var(input, rstd, inner_dim_indices, keepdim=True)
+    x_hat = (input - mean_) * rstd_
+    if weight is not None:
+        grad_x_hat = grad_out * weight
+    else:
+        grad_x_hat = grad_out
+    a = grad_x_hat * N
+    b = torch.sum(grad_x_hat, inner_dim_indices, True)
+    c1 = torch.mul(grad_x_hat, x_hat)
+    c2 = torch.sum(c1, inner_dim_indices, True)
+    c3 = torch.mul(x_hat, c2)
+    inner = a - b - c3
+    if output_mask[0]:
+        d_input: Optional[Tensor] = (rstd_ / N) * inner
+    else:
+        d_input = torch.zeros_like(input)  # should be None but doesn't work with vjp
+    if output_mask[1] and weight is not None:
+        if len(outer_dim_indices) > 0:
+            d_weight: Optional[Tensor] = torch.sum(
+                grad_out * x_hat, outer_dim_indices, False
+            )
+        else:
+            d_weight = grad_out * x_hat
+    elif weight is not None:
+        d_weight = torch.zeros_like(weight)  # should be None but doesn't work with vjp
+    else:
+        d_weight = torch.zeros(())  # should be None but doesn't work with vjp
+    if output_mask[2] and bias is not None:
+        if len(outer_dim_indices) > 0:
+            d_bias: Optional[Tensor] = torch.sum(grad_out, outer_dim_indices, False)
+        else:
+            d_bias = grad_out.clone()
+    elif bias is not None:
+        d_bias = torch.zeros_like(bias)  # should be None but doesn't work with vjp
+    else:
+        d_bias = torch.zeros(())  # should be None but doesn't work with vjp
+    return (d_input, d_weight, d_bias)
+def prod(x: list[int]):
+    r = 1
+    for i in x:
+        r *= i
+    return r
+@register_decomposition_for_jvp(aten.native_batch_norm_backward)
+def native_batch_norm_backward(
+    grad_out: Tensor,
+    input: Tensor,
+    weight: Optional[Tensor],
+    running_mean: Optional[Tensor],
+    running_var: Optional[Tensor],
+    save_mean: Optional[Tensor],
+    save_invstd: Optional[Tensor],
+    train: bool,
+    eps: float,
+    output_mask: list[bool],
+) -> tuple[Tensor, Optional[Tensor], Optional[Tensor]]:
+    input_shape = input.shape
+    input_rank = input.dim()
+    assert input_rank >= 2, "rank of the input must be at least 2"
+    axis = 1
+    num_features = prod(input_shape) / input_shape[axis]  # type: ignore[arg-type]
+    mean = save_mean
+    invstd = save_invstd
+    if train:
+        assert save_mean is not None and save_invstd is not None, (
+            "when train=True, save_mean and save_invstd are required"
+        )
+        reduciton_dims = [0] + list(range(2, input.dim()))
+        assert invstd is not None  # for typing
+        mean, invstd = recompute_mean_var(input, invstd, reduciton_dims, keepdim=False)
+    else:
+        assert running_mean is not None and running_var is not None
+        mean = running_mean
+        invstd = torch.rsqrt(running_var + eps)
+    assert invstd is not None and mean is not None
+    broadcast_mask = [1] * input_rank
+    broadcast_mask[axis] = input_shape[axis]
+    reduction_axes: list[int] = []
+    for i in range(input_rank):
+        if i != axis:
+            reduction_axes.append(i)
+    mean = torch.reshape(mean, broadcast_mask)
+    norm = 1.0 / num_features
+    grad_output_sum = torch.sum(grad_out, reduction_axes)
+    dot_p = torch.sum(grad_out * (input - mean), reduction_axes)
+    grad_mean = torch.reshape(grad_output_sum * norm, broadcast_mask)
+    proj_scale = torch.reshape(torch.mul(dot_p * norm, invstd * invstd), broadcast_mask)
+    if weight is None:
+        grad_scale = torch.reshape(invstd, broadcast_mask) * 1.0
+    else:
+        grad_scale = torch.reshape(invstd * weight, broadcast_mask)
+    if train:
+        proj = (input - mean) * proj_scale
+        grad_input = ((grad_out - proj) - grad_mean) * grad_scale
+    else:
+        grad_input = grad_out * grad_scale
+    if output_mask[1]:
+        grad_weight = dot_p * invstd
+    elif weight is not None:
+        grad_weight = torch.zeros_like(
+            weight
+        )  # should be None but doesn't work with vjp
+    else:
+        grad_weight = torch.zeros(())  # should be None but doesn't work with vjp
+    if output_mask[2]:
+        grad_bias = grad_output_sum
+    else:
+        grad_bias = torch.zeros_like(
+            grad_output_sum
+        )  # should be None but doesn't work with vjp
+    return (grad_input, grad_weight, grad_bias)
+@register_decomposition_for_jvp(aten.batch_norm_backward)
+def batch_norm_backward(
+    grad_out: Tensor,
+    input: Tensor,
+    weight: Tensor,
+    running_mean: Optional[Tensor],
+    running_var: Optional[Tensor],
+    save_mean: Optional[Tensor],
+    save_var: Optional[Tensor],
+    update: bool,
+    eps: float,
+    output_mask: list[bool],
+    reserve: Tensor,
+) -> tuple[Tensor, Optional[Tensor], Optional[Tensor]]:
+    return native_batch_norm_backward(
+        grad_out,
+        input,
+        weight,
+        running_mean,
+        running_var,
+        save_mean,
+        save_var,
+        update,
+        eps,
+        output_mask,
+    )
+_register_jit_decomposition_for_jvp(torch.ops.aten.trace.default, use_python=True)
+_register_jit_decomposition_for_jvp(torch.ops.aten.nll_loss_backward.default)
+_register_jit_decomposition_for_jvp(torch.ops.aten.nll_loss2d_backward.default)
+_register_jit_decomposition_for_jvp(torch.ops.aten._log_softmax_backward_data.default)
+_register_jit_decomposition_for_jvp(torch.ops.aten._softmax_backward_data.default)
+_register_jit_decomposition_for_jvp(torch.ops.aten.log_sigmoid_forward.default)
+_register_jit_decomposition_for_jvp(torch.ops.aten.native_layer_norm_backward.default)
+_register_jit_decomposition_for_jvp(torch.ops.aten.native_batch_norm_backward.default)
+_register_jit_decomposition_for_jvp(torch.ops.aten.cudnn_batch_norm_backward.default)
+_register_jit_decomposition_for_jvp(torch.ops.aten.batch_norm_backward.default)
+_register_jit_decomposition_for_jvp(torch.ops.aten.miopen_batch_norm_backward.default)

phivenv/Lib/site-packages/torch/_decomp/decompositions_for_rng.py ADDED Viewed

	@@ -0,0 +1,266 @@

+# mypy: allow-untyped-decorators
+# mypy: allow-untyped-defs
+import functools
+from collections import defaultdict
+from typing import Callable
+import torch
+import torch._decomp as decomp
+from torch._decomp import get_decompositions
+from torch._ops import OpOverload
+aten = torch.ops.aten
+rng_decompositions: dict[str, dict[OpOverload, Callable]] = defaultdict(dict)
+def register_rng_decomposition(aten_op):
+    return decomp.register_decomposition(aten_op, rng_decompositions)
+def throw_on_non_cuda(device):
+    raise RuntimeError(
+        f"You are trying to functionalize a {device.type} RNG operator but {device.type} does not "
+        f"use Philox/counter-based RNG. Therefore, functionalizing a {device.type} RNG operator is "
+        "not supported. We are discussing the possibility of a Philox-based RNG implementation for CPU."
+    )
+# TODO - We have to register many more distributions here, and also higher level
+# ops like dropout which have fused implementation and can hide the rand inside.
+@register_rng_decomposition(aten.rand)
+def rand(shape, dtype=None, layout=torch.strided, device=None, pin_memory=False):
+    if device and device.type != "cuda":
+        throw_on_non_cuda(device)
+    seed, offset = PhiloxStateTracker.get_state_as_tuple()
+    dtype = dtype or torch.float32
+    out, offset_jump = torch.ops.rngprims.philox_rand(
+        shape, seed, offset, None, device, dtype
+    )
+    PhiloxStateTracker.advance_offset(offset_jump)
+    return out
+@register_rng_decomposition(aten.rand_like)
+def rand_like(
+    x: torch.Tensor,
+    dtype=None,
+    layout=None,
+    device=None,
+    pin_memory=False,
+    memory_format=torch.preserve_format,
+):
+    device = device or x.device
+    if device.type != "cuda":
+        throw_on_non_cuda(device)
+    dtype = dtype or x.dtype
+    seed, offset = PhiloxStateTracker.get_state_as_tuple()
+    out, offset_jump = torch.ops.rngprims.philox_rand(
+        x.shape, seed, offset, None, device, dtype
+    )
+    PhiloxStateTracker.advance_offset(offset_jump)
+    return out
+class PhiloxState:
+    """
+    Represents a PhiloxRngState - (seed, offset) where offset = base_offset +
+    relative_offset. seed and base_offset basically point to the rng state just
+    before tracing starts. relative offset tracks the totally consumed offset at
+    trace time.
+    """
+    def __init__(self) -> None:
+        self.reset()
+    def reset(self):
+        self.seed = torch.tensor(())
+        self.base_offset = torch.tensor(())
+        self.relative_offset = 0
+        self.offset_advanced_alteast_once = False
+    def validate_state(self):
+        assert self.seed.numel() != 0 and self.base_offset.numel() != 0
+    def advance_offset(self, consumed_offset):
+        self.offset_advanced_alteast_once = True
+        self.relative_offset = self.relative_offset + consumed_offset
+    def set_state(self, seed, base_offset, relative_offset=0):
+        self.seed = seed
+        self.base_offset = base_offset
+        self.relative_offset = relative_offset
+    def get_state_as_tuple(self):
+        self.validate_state()
+        return (self.seed, self.base_offset + self.relative_offset)
+    def get_state_as_tensor(self):
+        # Only needed because we override get_rng_state.
+        self.validate_state()
+        return torch.stack([self.seed, self.base_offset + self.relative_offset])
+    def set_state_from_tensor(self, state):
+        # Only needed because we override set_rng_state.
+        self.seed, self.base_offset = torch.unbind(state)
+        self.relative_offset = 0
+class PhiloxStateTracker:
+    """
+    Singleton class to track the philox rng state during AOT Autograd tracing.
+    For each aot tracing instance, AOT Autograd resets this tracker and keeps
+    track of both forward and backward offsets. At runtime, we only care about
+    the total consumed forward and backward offsets. For dynamic shapes, these
+    offsets are a function of input shapes. Therefore, the AOT generated graphs
+    have additional outputs that compute total consumed forward and backward
+    offsets.
+    """
+    running_state: PhiloxState
+    fwd_state: PhiloxState
+    bwd_state: PhiloxState
+    def __enter__(self):
+        PhiloxStateTracker.reset()
+        return self
+    def __exit__(self, exc_type, exc_cal, exc_tb):
+        PhiloxStateTracker.reset()
+    @classmethod
+    def reset(cls):
+        cls.running_state = PhiloxState()
+        cls.fwd_state = PhiloxState()
+        cls.bwd_state = PhiloxState()
+    @classmethod
+    def mark_beginning_of_forward(cls):
+        # Tells the tracker to use fwd_state as the running state
+        cls.running_state = cls.fwd_state
+    @classmethod
+    def mark_beginning_of_backward(cls):
+        # Tells the tracker to use bwd_state as the running state
+        cls.running_state = cls.bwd_state
+    @classmethod
+    def record_state(cls, seed, offset, mode):
+        # Records the seed and offset tensors. These tensors are used to invoke
+        # the philox_rand functional primitives.
+        if mode == "forward":
+            cls.fwd_state.set_state(seed, offset)
+            cls.mark_beginning_of_forward()
+        else:
+            assert mode == "backward"
+            cls.bwd_state.set_state(seed, offset)
+    @classmethod
+    def get_state_as_tensor(cls):
+        # The only reason this exists is because we override get_rng_state and
+        # set_rng_state during tracing. get_rng_state expects a tensor output,
+        # so return (seed, offset) tuple upset other parts of the program like
+        # ctx.saved_tensors.
+        # A bad consequence is that if user saves and restores rng state, we
+        # have little bit of ugliness in the generated code, where we first
+        # concat the (seed, offset) to create a tensor for get_rng_state, and
+        # then split it back to get (seed, offset) tuple in set_rng_state.
+        # TODO: Investigate if there is be a better way to wrap the tuple in a
+        # false Tensor object, and then desugar it later on.
+        return cls.running_state.get_state_as_tensor()
+    @classmethod
+    def get_state_as_tuple(cls):
+        return cls.running_state.get_state_as_tuple()
+    @classmethod
+    def set_state_from_tensor(cls, x):
+        # This is only needed because we override set_rng_state. Look at the
+        # comment in get_state_from_tensor method.
+        cls.running_state.set_state_from_tensor(x)
+    @classmethod
+    def advance_offset(cls, consumed_offset):
+        cls.running_state.advance_offset(consumed_offset)
+    @classmethod
+    def get_current_relative_offset(cls):
+        return cls.running_state.relative_offset
+    @staticmethod
+    def multiple_of_4(offset):
+        # torch cuda rng state offset must be a multiple of 4. For inductor, as
+        # we sum up all the numel, the result might not be a multiple of 4. This
+        # method achieves that.
+        return (offset + 3) // 4 * 4
+    @classmethod
+    def get_updated_fwd_offset(cls):
+        # Short circuit if no rand ops were observed
+        if not cls.fwd_state.offset_advanced_alteast_once:
+            return cls.fwd_state.base_offset
+        return cls.multiple_of_4(
+            cls.fwd_state.base_offset + cls.fwd_state.relative_offset
+        )
+    @classmethod
+    def get_updated_bwd_offset(cls):
+        # Short circuit if no rand ops were observed
+        if not cls.bwd_state.offset_advanced_alteast_once:
+            return cls.bwd_state.base_offset
+        return cls.multiple_of_4(
+            cls.bwd_state.base_offset + cls.bwd_state.relative_offset
+        )
+# Adding more decompositions which eventually use rand_like inside decomps.
+# Adding these in rng_decompositions ensures the functionalization of rand_like
+# ops used in these decomps. The list is copied from inductor codebase, which
+# uses it for similar purpose.
+#
+# Caution - These decomps do not have same accuracy as that of eager. However,
+# we can't just disable them with a config flag like fallback_random, because
+# for functionalization of rng ops, we have to decompose these ops.
+extra_random_decomps = get_decompositions(
+    [
+        aten.cauchy,
+        aten.cauchy_,
+        aten.exponential,
+        aten.exponential_,
+        aten.geometric,
+        aten.geometric_,
+        aten.native_dropout,
+        aten.normal,
+        aten.normal_,
+        aten.normal_functional,
+        aten.log_normal,
+        aten.log_normal_,
+        aten.rrelu_with_noise,
+        aten.rrelu_with_noise_,
+        aten.uniform_,
+    ]
+)
+register_extra_random_decomp = functools.partial(
+    decomp.register_decomposition, registry=extra_random_decomps
+)
+@register_extra_random_decomp([aten.bernoulli_])
+def bernoulli_(self, p=0.5):
+    if self.device == torch.device("cpu"):
+        return NotImplemented
+    return self.copy_(torch.rand_like(self, dtype=torch.float32) < p)
+@register_extra_random_decomp([aten.bernoulli.p])
+def bernoulli_p(self, p=0.5, *, generator=None):
+    if self.device == torch.device("cpu"):
+        return NotImplemented
+    assert generator is None
+    return torch.rand_like(self, dtype=torch.float32) < p
+rng_decompositions.update(extra_random_decomps)  # type: ignore[arg-type]

phivenv/Lib/site-packages/torch/_dispatch/__init__.py ADDED Viewed

File without changes

phivenv/Lib/site-packages/torch/_dispatch/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (159 Bytes). View file

phivenv/Lib/site-packages/torch/_dispatch/__pycache__/python.cpython-39.pyc ADDED Viewed

Binary file (6.92 kB). View file

phivenv/Lib/site-packages/torch/_dispatch/python.py ADDED Viewed

	@@ -0,0 +1,192 @@

+# mypy: allow-untyped-defs
+import itertools
+import unittest.mock
+from collections.abc import Iterator
+from contextlib import contextmanager
+from typing import Callable, TypeVar, Union
+from typing_extensions import ParamSpec
+import torch
+import torch._C
+import torch._ops
+import torch.utils._python_dispatch
+import torch.utils._pytree as pytree
+from torch._C import DispatchKey
+__all__ = ["enable_python_dispatcher", "no_python_dispatcher", "enable_pre_dispatch"]
+no_python_dispatcher = torch._C._DisablePythonDispatcher
+enable_python_dispatcher = torch._C._EnablePythonDispatcher
+enable_pre_dispatch = torch._C._EnablePreDispatch
+CROSSREF_FUNCTIONALIZE = False
+_P = ParamSpec("_P")
+_T = TypeVar("_T")
+def all_py_loaded_overloads() -> Iterator[torch._ops.OpOverload]:
+    """
+    Warning: the set of overloads this will report is very subtle.  It is precisely
+    the set of torch.ops functions that have actually been accessed from Python
+    (e.g., we actually called torch.ops.aten.blah at some point.  This is DIFFERENT
+    from the set of registered operators, which will in general be a larger set,
+    as this would include all operators which we ran C++ static initializers or
+    Python operator registration on.  This does not eagerly populate the list on
+    torch.ops.aten; this list is lazy!
+    In other words, this is good for traversing over everything that has an
+    OpOverload object allocated in Python.  We use it for cache invalidation, but
+    don't rely on this list being complete.
+    Note that even if we did report all C++ registered overloads, this isn't guaranteed
+    to be complete either, as a subsequent lazy load of a library which triggers more
+    registrations could add more things to the set.
+    """
+    for ns in torch.ops:
+        packets = getattr(torch.ops, ns)
+        for op_name in packets:
+            packet = getattr(packets, op_name)
+            for overload in packet:
+                yield getattr(packet, overload)
+@contextmanager
+def suspend_functionalization():
+    f_tls = torch._C._dispatch_tls_is_dispatch_key_included(
+        torch._C.DispatchKey.Functionalize
+    )
+    f_rv = torch._C._functionalization_reapply_views_tls()
+    if f_tls:
+        torch._disable_functionalization()
+    try:
+        yield
+    finally:
+        if f_tls:
+            torch._enable_functionalization(reapply_views=f_rv)
+def check_tensor_metadata_matches(nv, rv, desc):
+    assert callable(desc)
+    assert nv.size() == rv.size(), f"{desc()}: sizes {nv.size()} != {rv.size()}"
+    assert nv.dtype == rv.dtype, f"{desc()}: dtype {nv.dtype} != {rv.dtype}"
+    same_strides, idx = torch._prims_common.check_significant_strides(
+        nv, rv, only_cuda=False
+    )
+    assert same_strides, (
+        f"{desc()}: strides {nv.stride()} != {rv.stride()} (mismatch at index {idx})"
+    )
+def check_metadata_matches(n, r, desc):
+    assert callable(desc)
+    n_vals, _n_spec = pytree.tree_flatten(n)
+    r_vals, _r_spec = pytree.tree_flatten(r)
+    # TODO: test the specs match; empirically  sometimes we have a tuple
+    # on one side and a list on the other
+    assert len(n_vals) == len(r_vals), f"{len(n_vals)} != {len(r_vals)}"
+    for i, nv, rv in zip(range(len(n_vals)), n_vals, r_vals):
+        if not isinstance(rv, torch.Tensor):
+            continue
+        check_tensor_metadata_matches(nv, rv, lambda: f"{desc()} output {i}")
+class Lit:
+    def __init__(self, s):
+        self.s = s
+    def __repr__(self):
+        return self.s
+def _fmt(a: object) -> object:
+    if isinstance(a, torch.Tensor):
+        return Lit(
+            f"torch.empty_strided({tuple(a.size())}, {a.stride()}, dtype={a.dtype})"
+        )
+    else:
+        return a
+def make_crossref_functionalize(
+    op: torch._ops.OpOverload[_P, _T], final_key: DispatchKey
+) -> Union[Callable[_P, _T], DispatchKey]:
+    from torch._subclasses.fake_tensor import FakeTensorMode
+    # This case is pretty weird, suppress it for now
+    if op == torch.ops.aten.lift_fresh.default:
+        return final_key
+    def handler(*args: _P.args, **kwargs: _P.kwargs) -> _T:
+        fake_mode = FakeTensorMode()
+        def fakeify_defun(t):
+            if isinstance(t, torch.Tensor):
+                if torch._is_functional_tensor(t):
+                    r = torch._from_functional_tensor(t)
+                    # NB: This assumes that the inner tensor sizes/strides match
+                    # the outer tensor sizes/strides.  This doesn't necessarily have to
+                    # be the case, see discussion at
+                    # https://github.com/pytorch/pytorch/pull/87610/files/401ddeda1d769bedc88a12de332c7357b60e51a4#r1007264456
+                    assert t.size() == r.size()
+                    assert t.stride() == r.stride()
+                else:
+                    r = t
+                # TODO: suppress guards
+                return fake_mode.from_tensor(r)
+            return t
+        def maybe_detach(t):
+            if isinstance(t, torch.Tensor):
+                return t.detach()
+            else:
+                return t
+        # TODO: This probably does the wrong thing if you're running other
+        # substantive modes with the normal op outside here
+        with (
+            torch.utils._python_dispatch._disable_current_modes(),
+            suspend_functionalization(),
+        ):
+            f_args, f_kwargs = pytree.tree_map(fakeify_defun, (args, kwargs))
+            orig_f_args, orig_f_kwargs = pytree.tree_map(
+                maybe_detach, (f_args, f_kwargs)
+            )
+            with fake_mode:
+                f_r = op(*f_args, **f_kwargs)
+        r = op._op_dk(final_key, *args, **kwargs)
+        def desc():
+            fmt_args = ", ".join(
+                itertools.chain(
+                    (repr(pytree.tree_map(_fmt, a)) for a in orig_f_args),
+                    (
+                        f"{k}={pytree.tree_map(_fmt, v)}"
+                        for k, v in orig_f_kwargs.items()
+                    ),
+                )
+            )
+            return f"{op}({fmt_args})"
+        check_metadata_matches(f_r, r, desc)
+        return r
+    return handler
+# NB: enabling this is slow, don't do it in a hot loop.  This is purely
+# for debugging purposes.
+@contextmanager
+def enable_crossref_functionalize():
+    for op in all_py_loaded_overloads():
+        op._uncache_dispatch(torch._C.DispatchKey.Functionalize)
+    try:
+        with (
+            enable_python_dispatcher(),
+            unittest.mock.patch("torch._dispatch.python.CROSSREF_FUNCTIONALIZE", True),
+        ):
+            yield
+    finally:
+        for op in all_py_loaded_overloads():
+            op._uncache_dispatch(torch._C.DispatchKey.Functionalize)