koichi12 commited on Feb 12, 2025

Commit

5dbc224

verified ·

1 Parent(s): 9a8eae1

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
.venv/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_heuristic.so.9 +3 -0
.venv/lib/python3.11/site-packages/torch/_export/converter.py +1584 -0
.venv/lib/python3.11/site-packages/torch/_export/non_strict_utils.py +523 -0
.venv/lib/python3.11/site-packages/torch/_export/pass_base.py +441 -0
.venv/lib/python3.11/site-packages/torch/_export/tools.py +146 -0
.venv/lib/python3.11/site-packages/torch/_export/verifier.py +456 -0
.venv/lib/python3.11/site-packages/torch/_export/wrappers.py +121 -0
.venv/lib/python3.11/site-packages/torch/_lazy/__init__.py +55 -0
.venv/lib/python3.11/site-packages/torch/_lazy/__pycache__/ir_cache.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_lazy/__pycache__/ts_backend.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_lazy/computation.py +27 -0
.venv/lib/python3.11/site-packages/torch/_lazy/config.py +17 -0
.venv/lib/python3.11/site-packages/torch/_lazy/debug.py +22 -0
.venv/lib/python3.11/site-packages/torch/_lazy/device_context.py +26 -0
.venv/lib/python3.11/site-packages/torch/_lazy/extract_compiled_graph.py +225 -0
.venv/lib/python3.11/site-packages/torch/_lazy/metrics.py +22 -0
.venv/lib/python3.11/site-packages/torch/_lazy/ts_backend.py +7 -0
.venv/lib/python3.11/site-packages/torch/multiprocessing/_atfork.py +35 -0
.venv/lib/python3.11/site-packages/torch/multiprocessing/pool.py +52 -0
.venv/lib/python3.11/site-packages/torch/multiprocessing/queue.py +43 -0
.venv/lib/python3.11/site-packages/torch/multiprocessing/reductions.py +647 -0
.venv/lib/python3.11/site-packages/torch/multiprocessing/spawn.py +328 -0
.venv/lib/python3.11/site-packages/torch/nn/quantizable/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/quantizable/modules/__init__.py +9 -0
.venv/lib/python3.11/site-packages/torch/nn/quantizable/modules/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/quantizable/modules/__pycache__/activation.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/quantizable/modules/__pycache__/rnn.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/quantized/dynamic/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/quantized/dynamic/modules/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/quantized/dynamic/modules/__pycache__/conv.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/quantized/dynamic/modules/__pycache__/linear.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__init__.py +39 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/_deprecation_utils.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/_named_member_accessor.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/_per_sample_grad.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/clip_grad.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/convert_parameters.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/fusion.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/init.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/memory_format.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/parametrizations.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/parametrize.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/prune.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/rnn.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/spectral_norm.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/stateless.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/weight_norm.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/_deprecation_utils.py +54 -0

.gitattributes CHANGED Viewed

@@ -123,3 +123,5 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/_
 .venv/lib/python3.11/site-packages/nvidia/cuda_runtime/lib/libcudart.so.12 filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/nvidia/cublas/lib/libnvblas.so.12 filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/opencv_python_headless.libs/libopenblas-r0-f650aae0.3.3.so filter=lfs diff=lfs merge=lfs -text

 .venv/lib/python3.11/site-packages/nvidia/cuda_runtime/lib/libcudart.so.12 filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/nvidia/cublas/lib/libnvblas.so.12 filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/opencv_python_headless.libs/libopenblas-r0-f650aae0.3.3.so filter=lfs diff=lfs merge=lfs -text
+.venv/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_heuristic.so.9 filter=lfs diff=lfs merge=lfs -text
+.venv/lib/python3.11/site-packages/vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so filter=lfs diff=lfs merge=lfs -text

.venv/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_heuristic.so.9 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:94fab98c15040558c3c80f2c1a2f5fda9baa72afc39a88bdcc82185f49d241c3
+size 86326864

.venv/lib/python3.11/site-packages/torch/_export/converter.py ADDED Viewed

	@@ -0,0 +1,1584 @@

+# mypy: allow-untyped-defs
+import builtins
+import logging
+import operator
+import typing
+import warnings
+from contextlib import contextmanager
+from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
+import torch
+import torch.export._trace
+from torch import _C
+from torch._export.passes.replace_quantized_ops_with_standard_ops_pass import (
+    replace_quantized_ops_with_standard_ops,
+)
+from torch.export.exported_program import ExportedProgram
+from torch.export.graph_signature import (
+    ConstantArgument,
+    CustomObjArgument,
+    InputKind,
+    InputSpec,
+    OutputKind,
+    OutputSpec,
+    TensorArgument,
+)
+from torch.fx import subgraph_rewriter
+log = logging.getLogger(__name__)
+def _get_param_count_list(method_graph, args_params):
+    param_count_list = []
+    for input_, arg_params_ in zip(method_graph.inputs(), args_params):
+        if "PackedParams" in str(input_.type()):
+            in_vars, _ = torch.jit._flatten(arg_params_)
+            param_count_list.append(len(in_vars))
+        else:
+            param_count_list.append(arg_params_ is not None)
+    return param_count_list
+def _trace_and_get_graph_from_model(model, args):
+    # A basic sanity check: make sure the state_dict keys are the same
+    # before and after running the model.  Fail fast!
+    orig_state_dict_keys = torch.jit._unique_state_dict(model).keys()
+    # Disable Autocast cache because it replaces kernel's weight and bias
+    # by (undesired) constants.
+    # No perf impact for when there are reused weights since https://github.com/pytorch/pytorch/pull/85665
+    prev_autocast_cache_enabled = torch.is_autocast_cache_enabled()
+    torch.set_autocast_cache_enabled(False)
+    trace_graph, torch_out, inputs_states = torch.jit._get_trace_graph(
+        model,
+        args,
+        strict=False,
+        _force_outplace=False,
+        _return_inputs_states=True,
+    )
+    torch.set_autocast_cache_enabled(prev_autocast_cache_enabled)
+    if orig_state_dict_keys != torch.jit._unique_state_dict(model).keys():
+        raise RuntimeError(
+            "state_dict changed after running the tracer; "
+            "something weird is happening in your model!"
+        )
+    return trace_graph, torch_out
+def _create_jit_graph(
+    model: Union[torch.nn.Module, torch.jit.ScriptFunction], args: Sequence[Any]
+) -> Tuple[torch.Graph, List["_C.IValue"], Any, Optional[torch.ScriptModule]]:
+    if isinstance(model, (torch.jit.ScriptFunction, torch.jit.ScriptModule)):
+        flattened_args = tuple(torch.jit._flatten(tuple(args))[0])
+        torch_out = None
+        if isinstance(model, torch.jit.ScriptModule):
+            try:
+                graph = model.forward.graph  # type: ignore[attr-defined]
+            except AttributeError as e:
+                raise RuntimeError("'forward' method must be a script method") from e
+            _C._jit_pass_onnx_function_substitution(graph)
+            freezed_module = _C._freeze_module(
+                typing.cast(_C.ScriptModule, model._c), preserveParameters=True
+            )
+            module, params = _C._jit_onnx_list_model_parameters(freezed_module)
+            method_graph = module._get_method("forward").graph
+            args_params = tuple(args) + tuple(params)
+            param_count_list = _get_param_count_list(method_graph, args_params)
+            in_vars, _ = torch.jit._flatten(args_params)
+            graph = _C._propagate_and_assign_input_shapes(
+                method_graph, tuple(in_vars), param_count_list, False, False
+            )
+            return graph, params, torch_out, module
+        # torch.jit.ScriptFunction
+        params = []
+        graph = model.graph
+        _C._jit_pass_onnx_function_substitution(graph)
+        param_count_list = _get_param_count_list(graph, args)
+        graph = _C._propagate_and_assign_input_shapes(
+            graph, flattened_args, param_count_list, False, False
+        )
+        return graph, params, torch_out, None
+    graph, torch_out = _trace_and_get_graph_from_model(model, args)
+    _C._jit_pass_onnx_lint(graph)
+    state_dict = torch.jit._unique_state_dict(model)
+    params = list(state_dict.values())
+    graph_inputs = list(graph.inputs())
+    user_input_num = len(graph_inputs) - len(state_dict)
+    param_names = list(state_dict.keys())
+    for i, inp in enumerate(graph_inputs):
+        if i >= user_input_num:
+            inp.setDebugName(param_names[i - user_input_num])
+    _C._jit_pass_onnx_function_substitution(graph)
+    return graph, params, torch_out, None
+def list_add(a, b):
+    return a + b
+def list_append(container, element):
+    return container + [element]
+def execute_subgraph_from_prim_loop(
+    subgraph, iter_idx, len_loop_local_arguments, *args, **kwargs
+):
+    """
+    subgraph: GraphModule from sub-block.
+    iter_idx: The index of interation.
+    len_loop_local_arguments: The number of loop local arguments in args.
+    """
+    # Loop local variables. TS graph create those as inputs because their values
+    # are updated inside the loop.
+    loop_local_args = args[:len_loop_local_arguments]
+    # Global variables that are not passed in as inputs to the loop sub-blocks
+    # but are directly used. Most of time, their values are not updated, but
+    # the only exception is when there are some operations that perform inplace
+    # updates.
+    global_args = args[len_loop_local_arguments:]
+    return subgraph(*global_args, iter_idx, *loop_local_args, **kwargs)
+def inplace_optimize_sym_size_div(gm: torch.fx.GraphModule):
+    def pattern(im, dim, scale):
+        sym_size_int = torch.ops.aten.sym_size.int(im, dim)
+        scalar_tensor = torch.ops.aten.scalar_tensor(sym_size_int)
+        div_scalar_mode = torch.ops.aten.div.Scalar_mode(
+            scalar_tensor, scale, rounding_mode="trunc"
+        )
+        int_tensor = torch.ops.aten.Int.Tensor(div_scalar_mode)
+        return int_tensor
+    def replacement(im, dim, scale):
+        sym_size_int = torch.ops.aten.sym_size.int(im, dim)
+        return sym_size_int // scale
+    replaced_patterns = subgraph_rewriter.replace_pattern(gm, pattern, replacement)
+def is_valid_for_codegen(name):
+    if len(name) == 0:
+        raise RuntimeError("Empty argument name for codegen")
+    if name[0].isdigit():
+        return False
+    return True
+def normalize_name(name: str, prefix: str = "rename") -> str:
+    name = name.replace(".", "_")
+    if is_valid_for_codegen(name):
+        return name
+    return f"{prefix}_{name}"
+def ir_name_to_func_name(name: str) -> str:
+    """prim::If -> convert_prim_If"""
+    name_list = name.split("::")
+    return "convert_" + "_".join(name_list)
+def get_node_as_placeholder_or_get_attr(fx_graph, name, is_top_level_graph):
+    if is_top_level_graph:
+        return fx_graph.get_attr(name)
+    return fx_graph.placeholder(name)
+_TORCH_DTYPE_TO_ENUM = {
+    torch.uint8: 0,
+    torch.int8: 1,
+    torch.int16: 2,
+    torch.int32: 3,
+    torch.int64: 4,
+    torch.float16: 5,
+    torch.float32: 6,
+    torch.float64: 7,
+    torch.complex32: 8,
+    torch.complex64: 9,
+    torch.complex128: 10,
+    torch.bool: 11,
+    torch.qint8: 12,
+    torch.quint8: 13,
+    torch.bfloat16: 15,
+}
+_TORCH_ENUM_TO_DTYPE = {value: key for key, value in _TORCH_DTYPE_TO_ENUM.items()}
+def get_dtype_as_int(tensor):
+    """
+    prim::dtype has the signature "Tensor a) -> int", where it gets the dtype of
+    the tensor and returns the integer corresponding to this dtype based on the
+    enum in ScalarType.h
+    """
+    dtype = tensor.dtype
+    if dtype not in _TORCH_DTYPE_TO_ENUM:
+        raise RuntimeError(f"Unsupported dtype {dtype}")
+    return _TORCH_DTYPE_TO_ENUM[dtype]
+# Those operators will be automatically populated to a instance method
+# of TS2FXGraphConverter with name convert_<namespace>_<opname>().
+# Please check __init__ for method population implementations.
+kind_to_standard_operators = {
+    "prim::max": builtins.max,
+    "prim::min": builtins.min,
+    "prim::TupleIndex": operator.getitem,
+    "aten::__is__": operator.is_,
+    "aten::__isnot__": operator.is_not,
+    "aten::__not__": operator.not_,
+    "aten::__contains__": operator.contains,
+    "prim::dtype": get_dtype_as_int,
+    "aten::len": len,
+    # Mapping from specialized op to its symbolic counterpart.
+    # They currently do not have any other overrides.
+    "aten::numel": torch.ops.aten.sym_numel,
+    "aten::size": torch.ops.aten.sym_size,
+    "aten::storage_offset": torch.ops.aten.sym_storage_offset,
+    "aten::stride": torch.ops.aten.sym_stride,
+}
+def get_ir_value_parent_name_and_attr_name(node):
+    irv_parent_name, irv_name = node.input().debugName(), node.output().debugName()
+    attr_name = node.s("name")
+    return irv_name, irv_parent_name, attr_name
+def construct_fqn(ir, ref_map, name_map):
+    name_list = []
+    while ir in ref_map:
+        name_list.append(name_map[ir])
+        ir = ref_map[ir]
+    return ".".join(reversed(name_list))
+def get_block_to_lifted_attrs(graph: torch._C.Graph) -> Dict[torch._C.Block, Set[str]]:
+    """
+    Perform two passes to get a mapping of blocks to a set of FQNs of its lifted attributes.
+    When a graph has control flow, the graph will be divided into multiple blocks. We want to convert
+    each block to a graph which will be passed into torch.cond. A restriction for torch.cond is that model
+    parameters/buffers are expected to be lifted as inputs to the subgraphs. Before converting the model,
+    we will run this pass which will:
+        1. Figure out which params/buffers are used within blocks through tracing the GetAttr calls.
+        2. Process the graph bottom up to find the lifted attributes of each block by taking the union
+        of the attributes used in the current block, and the lifted attributes of all its child blocks.
+    Returns:
+        A mapping of blocks to a set of FQNs of its lifted attributes.
+    """
+    # A map from a block to its expected to be lifted arguments.
+    blocks_to_lifted_attrs: Dict[torch._C.Block, Set[str]] = {}
+    # Reference map stores the input (i.e., src) and output (i.e., dest) IR of a
+    # GetAttr node. By traversing this reference map, we can figure out the
+    # full IR aliasing pass and figure out the FQN of an attribute.
+    # E.g., %2 = GetAttr(linear)[%1] --> node_to_parent_map["%2"] = "%1"
+    node_to_parent_map: Dict[str, str] = {}
+    # Used for reconstructing the FQN of an attribute based on the reference map.
+    # In nutshell, for each GetAttr call, GetAttr(input IR, attribute name) -> output IR
+    # This name map stores which attribute name is called for a src IR --> dest IR action.
+    # E.g., %2 = GetAttr(linear)[%1] --> node_to_attr_name["%2"] = "linear"
+    node_to_attr_name: Dict[str, str] = {}
+    def _dfs_get_attr_dependency(entry):
+        """
+        First DFS path to construct reference map and name map.
+        """
+        for node in entry.nodes():
+            if node.kind() == "prim::GetAttr":
+                (
+                    irv_name,
+                    irv_parent_name,
+                    attr_name,
+                ) = get_ir_value_parent_name_and_attr_name(node)
+                node_to_parent_map[irv_name] = irv_parent_name
+                node_to_attr_name[irv_name] = attr_name
+            for block in node.blocks():
+                _dfs_get_attr_dependency(block)
+    def _map_blocks_to_lifted_attrs(entry):
+        """
+        Walk the graph in a bottom-up fashion to build the expected to be
+        lifted arguments for each block.
+        """
+        arguments: Set[str] = set()
+        for node in entry.nodes():
+            for block in node.blocks():
+                # Recursively build.
+                arguments = arguments.union(_map_blocks_to_lifted_attrs(block))
+            if node.kind() == "prim::GetAttr":
+                irv_name = node.output().debugName()
+                # Skip for intermediate GetAttr, which will anyway not result a FQN.
+                # E.g., node_to_parent_name: {"%3": "%2", "%2": "%1"}
+                #       node_to_attr_name: {"%3": "weight", "%2": "linear", "%1": "self"}
+                #       There is only one FQN %3-->%2-->%1: self.linear.weight
+                #       %2-->%1 is not a FQN: self.linear
+                if irv_name not in set(node_to_parent_map.values()):
+                    arguments.add(
+                        construct_fqn(irv_name, node_to_parent_map, node_to_attr_name)
+                    )
+        if not isinstance(entry, torch._C.Graph):  # Skip the top level.
+            blocks_to_lifted_attrs[entry] = arguments
+        return arguments
+    _dfs_get_attr_dependency(graph)
+    _map_blocks_to_lifted_attrs(graph)
+    return blocks_to_lifted_attrs
+def get_attribute_fqn_from_ts_node(
+    name_to_attribute_fqn: Dict[str, str], node: torch._C.Node
+) -> str:
+    def get_attr(name: str):
+        if name in name_to_attribute_fqn:
+            return name_to_attribute_fqn[name]
+        else:
+            raise ValueError(f"Attribute {name} not found")
+    if node.kind() == "prim::SetAttr":
+        input_name = next(node.inputs()).debugName()
+    elif node.kind() == "prim::GetAttr":
+        input_name = node.input().debugName()
+    else:
+        raise RuntimeError(
+            f"Unexpected node kind when getting attribute fqn. node: {node} "
+        )
+    attr_name = node.s("name")
+    root_attr_name = get_attr(input_name)
+    attr_fqn = f"{root_attr_name}.{attr_name}" if root_attr_name else attr_name
+    return attr_fqn
+def get_op_overload(node: torch._C.Node):
+    schema_str = node.schema()
+    assert schema_str != "(no schema)", f"got empty schema for {node}"
+    schema: torch._C.FunctionSchema = torch._C.parse_schema(schema_str)
+    ns, op_name = str(schema.name).split("::")
+    override = schema.overload_name
+    try:
+        op_overload_mod = getattr(torch.ops, ns)
+        op_overload_packet = getattr(op_overload_mod, op_name)
+        if override:
+            op_overload = getattr(op_overload_packet, override)
+        else:
+            op_overload = op_overload_packet.default
+    except Exception as e:
+        raise RuntimeError(
+            f"Unable to find operator {node.kind()} with schema {node.schema()}"
+        ) from e
+    return op_overload
+class TS2FXGraphConverter:
+    def __init__(
+        self,
+        ts_graph: Union[torch._C.Graph, torch._C.Block],
+        name_to_param: Dict[str, torch.Tensor],
+        name_to_buffer: Dict[str, torch.Tensor],
+        blocks_to_lifted_attrs: Dict[torch._C.Block, Set[str]],
+        name_to_non_tensor_attribute: Dict[str, Any],
+        name_to_constant: Dict[str, Any],
+    ):
+        self.ts_graph = ts_graph
+        self.name_to_param = name_to_param
+        self.name_to_buffer = name_to_buffer
+        self.fx_graph: torch.fx.Graph = torch.fx.Graph()
+        self.input_specs: List[InputSpec] = []
+        self.output_specs: List[OutputSpec] = []
+        self.name_to_node: Dict[
+            str, Union[torch.fx.Node, List[torch.fx.Node], Dict[Any, torch.fx.Node]]
+        ] = {}
+        self.name_to_constant: Dict[str, Any] = name_to_constant
+        # Mapping from torchscript node output name to attribute fully qualified name
+        self.name_to_attribute_fqn: Dict[str, str] = {}
+        # Mapping from fully qualified name to real values or a fx graph node
+        # During convert, this represents the current value of a non-tensor attribute
+        # One use case is:
+        #   def forward(self, x):
+        #        c1 = self.count
+        #        self.count += 1
+        #        c2 = self.count
+        #        return x + c1 + c2
+        self.name_to_non_tensor_attribute_node: Dict[str, Any] = {}
+        # Mapping from fully qualified name to initial real values inputs
+        # We separate it from self.name_to_non_tensor_attribute_node since
+        # we need initial real value input when we construct fx.GraphModule
+        self.name_to_non_tensor_attribute: Dict[str, Any] = name_to_non_tensor_attribute
+        self.subgraphs: Dict[str, torch.fx.GraphModule] = {}
+        self.blocks_to_lifted_attrs = blocks_to_lifted_attrs
+        # Populate methods for the standard operators.
+        for k in kind_to_standard_operators.keys():
+            handler_func_name = ir_name_to_func_name(k)
+            # Create an indirect function call:
+            # convert_<namespace>_<opname> --> lambda node: _convert_standard_operator(node)
+            setattr(
+                self,
+                handler_func_name,
+                lambda node: self._convert_standard_operators(node),
+            )
+        # This stores a list of return results that do not appear in the original TS
+        # graph's outputs. The reason we maintain this is because some operations in the sub-block
+        # might have inplace updates to the variable defined in the parent fx graph. After
+        # the execution of that sub-block, the variable defined in the parent fx graph also
+        # needs to be updated.
+        self.name_update_from_subblock_to_parent: Set[str] = set()
+    def _is_get_attr_node(self, fqn):
+        return (
+            fqn in self.name_to_buffer
+            or fqn in self.name_to_param
+            or (
+                fqn in self.name_to_constant
+                and isinstance(self.name_to_constant[fqn], torch.ScriptObject)
+            )
+        )
+    def _convert_block_to_subgraph(self, node: torch._C.Node, arguments: List[str]):
+        subgraph_nodes, subgraph_converters = [], []
+        for block in node.blocks():
+            subgraph_converter = TS2FXGraphConverter(
+                block,
+                self.name_to_param,
+                self.name_to_buffer,
+                self.blocks_to_lifted_attrs,
+                {},
+                self.name_to_constant,
+            )
+            subgraph_converter.name_to_attribute_fqn = self.name_to_attribute_fqn
+            for block_arg in arguments:
+                normalized_block_arg_name = normalize_name(block_arg)
+                placeholder_node = subgraph_converter.fx_graph.placeholder(
+                    normalized_block_arg_name
+                )
+                subgraph_converter.name_to_node[block_arg] = placeholder_node
+            subgraph = subgraph_converter.convert()
+            subgraph_name = self.add_subgraph(subgraph)
+            subgraph_nodes.append(self.fx_graph.get_attr(subgraph_name))
+            subgraph_converters.append(subgraph_converter)
+        return subgraph_nodes, subgraph_converters
+    def _identify_inputs_as_arguments(self, entry):
+        """
+        Identify inputs from the innermost sub-block. This is needed
+        for nested sub-blocks when the input is hidden in the nested sub-block.
+        E.g., example IR of input is hidden in the nested sub-block.
+        Graph[x.1]
+        %1 = ...
+            Block[]
+                Block[x.1]
+                    %2 = x.1 ...
+        """
+        arguments: Set[str] = set()
+        for block in entry.blocks():
+            for block_node in block.nodes():
+                for block_node_in in block_node.inputs():
+                    if (
+                        block_node_in.debugName() in self.name_to_node
+                        and block_node_in.debugName() not in self.name_to_attribute_fqn
+                    ):
+                        arguments.add(block_node_in.debugName())
+                arguments = arguments.union(
+                    self._identify_inputs_as_arguments(block_node)
+                )
+        return arguments
+    def is_top_level_graph(self):
+        return isinstance(self.ts_graph, torch._C.Graph)
+    def add_subgraph(self, subgraph) -> str:
+        name = f"subgraph_{len(self.subgraphs)}"
+        self.subgraphs[name] = subgraph
+        return name
+    def get_args_kwargs(self, node: torch._C.Node, schema):
+        args = []
+        kwargs = {}
+        for input, schema_arg in zip(node.inputs(), schema.arguments):
+            if schema_arg.kwarg_only:
+                kwargs[schema_arg.name] = self.get_fx_value_by_ir_value(input)
+            else:
+                args.append(self.get_fx_value_by_ir_value(input))
+        return tuple(args), kwargs
+    def get_fx_value_by_ir_value(self, value: torch._C.Value):
+        value_name = value.debugName()
+        if value_name in self.name_to_node:
+            input_node = self.name_to_node[value_name]
+            return input_node
+        elif value_name in self.name_to_constant:
+            if isinstance(self.name_to_constant[value_name], torch.ScriptObject):
+                return self.fx_graph.get_attr(value_name)
+            return self.name_to_constant[value_name]
+        else:
+            raise ValueError(f"Input {value_name} not found")
+    def get_fx_value_by_fqn(self, name):
+        if name in self.name_to_node:
+            fx_node = self.name_to_node[name]
+        elif name in self.name_to_constant:
+            fx_node = self.name_to_constant[name]
+        elif name in self.name_to_non_tensor_attribute_node:
+            fx_node = self.name_to_non_tensor_attribute_node[name]
+        elif name in self.name_to_non_tensor_attribute:
+            fx_node = self.name_to_non_tensor_attribute[name]
+        else:
+            raise ValueError(f"Attribute {name} not found")
+        return fx_node
+    def convert(self) -> torch.fx.GraphModule:
+        self.convert_graph_inputs()
+        for node in self.ts_graph.nodes():
+            self.convert_node(node)
+        self.convert_graph_outputs()
+        # Pass parameter and buffer to the root for lookup.
+        gm = torch.fx.GraphModule(
+            {
+                **self.subgraphs,
+                **self.name_to_param,
+                **self.name_to_buffer,
+                **self.name_to_non_tensor_attribute,
+                **self.name_to_constant,
+            },
+            self.fx_graph,
+        )
+        inplace_optimize_sym_size_div(gm)
+        gm.graph.lint()
+        return gm
+    def convert_graph_inputs(self):
+        for graph_input in self.ts_graph.inputs():
+            name = graph_input.debugName()
+            if name in self.name_to_param:
+                normalized_name = normalize_name(name)
+                self.input_specs.append(
+                    InputSpec(
+                        InputKind.PARAMETER,
+                        arg=TensorArgument(name=normalized_name),
+                        target=name,
+                    )
+                )
+                fx_node = get_node_as_placeholder_or_get_attr(
+                    self.fx_graph, name, self.is_top_level_graph()
+                )
+            elif name in self.name_to_buffer:
+                normalized_name = normalize_name(name)
+                self.input_specs.append(
+                    InputSpec(
+                        InputKind.BUFFER,
+                        arg=TensorArgument(name=normalized_name),
+                        target=name,
+                        persistent=True,
+                    )
+                )
+                fx_node = get_node_as_placeholder_or_get_attr(
+                    self.fx_graph, name, self.is_top_level_graph()
+                )
+            elif name in self.name_to_constant:
+                assert isinstance(
+                    self.name_to_constant[name], torch.ScriptObject
+                ), "Input conversion only handles ScriptObject"
+                normalized_name = normalize_name(name)
+                self.input_specs.append(
+                    InputSpec(
+                        InputKind.CUSTOM_OBJ,
+                        arg=CustomObjArgument(
+                            name=normalized_name, class_fqn=normalized_name
+                        ),
+                        target=name,
+                        persistent=False,
+                    )
+                )
+                fx_node = get_node_as_placeholder_or_get_attr(
+                    self.fx_graph, name, self.is_top_level_graph()
+                )
+            elif isinstance(graph_input.type(), torch.ClassType):
+                # Directly skip inputs that are ScriptObject but not used in the graph.
+                continue
+            else:
+                normalized_name = normalize_name(name, prefix="input")
+                self.input_specs.append(
+                    InputSpec(
+                        InputKind.USER_INPUT,
+                        arg=TensorArgument(name=normalized_name),
+                        target=name,
+                    )
+                )
+                fx_node = self.fx_graph.placeholder(normalized_name)
+            self.name_to_node[name] = fx_node
+    def convert_aten_Float(self, node: torch._C.Node):
+        def to_float_tensor(t):
+            return t.to(dtype=torch.float).item()
+        inp_list = [
+            self.get_fx_value_by_ir_value(inp) for inp in node.inputs()
+        ]  # noqa: C416
+        fx_node = self.fx_graph.call_function(
+            to_float_tensor,
+            tuple(inp_list),
+        )
+        self.name_to_node[node.output().debugName()] = fx_node
+    def convert_aten_tensor(self, node: torch._C.Node):
+        """aten::tensor creates a constant tensor ad-hoc --> GetAttr"""
+        args, kwargs = self.get_args_kwargs(node, torch.ops.aten.tensor.default._schema)
+        for k in kwargs:
+            if k == "requires_grad":
+                kwargs[k] = bool(kwargs[k])  # 0 -> False, 1 -> True
+        to_tensor = (
+            torch.tensor
+            if all(isinstance(a, int) for a in args)
+            else torch._refs.tensor
+        )
+        def target(*args, **kwargs):
+            if "dtype" in kwargs and kwargs["dtype"] is not None:
+                kwargs["dtype"] = _TORCH_ENUM_TO_DTYPE[kwargs["dtype"]]
+            return to_tensor(*args, **kwargs)
+        # def to_dynamic_tensor(*args, **kwargs):
+        #     if "dtype" in kwargs and kwargs["dtype"] is not None:
+        #         kwargs["dtype"] = _TORCH_ENUM_TO_DTYPE[kwargs["dtype"]]
+        #     return torch._refs.tensor(*args, **kwargs)
+        output_name = node.output().debugName()
+        fx_node = self.fx_graph.call_function(target, args, kwargs)
+        self.name_to_node[output_name] = fx_node
+    def convert_aten_append(self, node: torch._C.Node):
+        # special handle python list append: "aten::append.t(t[](a!) self, t(c -> *) el) -> t[](a!)"
+        # inplace append to the list!! This is kinda crazy, as we are inplace mutating the list
+        # This makes the converter "non-functional", and the result depends on the order of the nodes being converter
+        # In a sense, the converter now becomes an stateful interpreter
+        warnings.warn(
+            "Converting aten::append.t, which is a inplace mutation of the list. "
+            "This makes the converter non-functional: the result depends on the order of the append nodes being converter!"
+        )
+        args = tuple(self.get_fx_value_by_ir_value(inp) for inp in node.inputs())
+        fx_node = self.fx_graph.call_function(list_append, args)
+        self.name_to_node[node.output().debugName()] = fx_node
+        # inplace mutate arg[0], which is the python list
+        self.name_to_node[node.inputsAt(0).debugName()] = fx_node
+        # Variables that need to be updated to parent module.
+        if not self.is_top_level_graph() and args[0].op == "placeholder":
+            self.name_update_from_subblock_to_parent.add(node.inputsAt(0).debugName())
+    def convert_prim_Constant(self, node: torch._C.Node):
+        name = node.output().debugName()
+        value: Any = None
+        if node.hasAttribute("value"):
+            constant_kind = node.kindOf("value")
+            if constant_kind == "i":
+                value = node.i("value")
+            elif constant_kind == "f":
+                value = node.f("value")
+            elif constant_kind == "s":
+                value = node.s("value")
+            elif constant_kind == "t":
+                alias_name = (
+                    f"lifted_tensor_{name}"  # Follow naming convention from EP tracing.
+                )
+                fx_node = self.fx_graph.get_attr(alias_name)
+                self.name_to_node[name] = fx_node
+                name, value = alias_name, node.t("value")
+            elif constant_kind == "ival":
+                value = node.ival("value")
+            else:
+                raise ValueError(f"Unsupported constant type: {node.kindOf('value')}")
+        else:
+            value = None
+        self.name_to_constant[name] = value
+    def convert_prim_CallMethod(self, node: torch._C.Node):
+        inp_list = [
+            self.get_fx_value_by_ir_value(inp) for inp in node.inputs()
+        ]  # noqa: C416
+        fx_node = self.fx_graph.call_method(
+            node.s("name"),
+            tuple(inp_list),
+        )
+        self.name_to_node[node.output().debugName()] = fx_node
+    def convert_prim_device(self, node: torch._C.Node):
+        input_type = node.input().type()
+        if input_type.isSubtypeOf(torch._C.TensorType.get()):
+            device = input_type.device()  # type: ignore[attr-defined]
+            output_name = node.output().debugName()
+            self.name_to_constant[output_name] = device
+        else:
+            raise ValueError(f"Unsupported JitType ({input_type}) when get device")
+    def convert_prim_GetAttr(self, node: torch._C.Node):
+        # Build fully qulified name
+        attr_fqn = get_attribute_fqn_from_ts_node(self.name_to_attribute_fqn, node)
+        output_name = node.output().debugName()
+        self.name_to_attribute_fqn[output_name] = attr_fqn
+        if self.is_top_level_graph():
+            if self._is_get_attr_node(attr_fqn):
+                # We insert a get_attr node due to two reasons.
+                # First, ts graph does not lift tensor constants as input nodes. So
+                # tensor constants may be ignored by in convert_graph_inputs().
+                # Second, attr_fqn may have been written to via SetAttr. Two
+                # GetAttr may give different values.
+                self.name_to_node[output_name] = self.fx_graph.get_attr(attr_fqn)
+            else:
+                if attr_fqn not in self.name_to_non_tensor_attribute_node:
+                    self.name_to_non_tensor_attribute_node[
+                        attr_fqn
+                    ] = self.name_to_non_tensor_attribute[attr_fqn]
+                self.name_to_node[output_name] = self.name_to_non_tensor_attribute_node[
+                    attr_fqn
+                ]
+        else:
+            # Special support for if blocks which do not allow SetAttr TorchScript
+            # node and get_attr FX Graph Node.
+            if self._is_get_attr_node(attr_fqn):
+                self.name_to_node[output_name] = self.name_to_node[attr_fqn]
+    def convert_prim_SetAttr(self, node: torch._C.Node):
+        attr_fqn = get_attribute_fqn_from_ts_node(self.name_to_attribute_fqn, node)
+        attr_value = tuple(node.inputs())[1]
+        ts_graph_tensor_input = self.get_fx_value_by_ir_value(attr_value)
+        if self._is_get_attr_node(attr_fqn):
+            fx_attr_node = self.fx_graph.get_attr(attr_fqn)
+            self.fx_graph.call_function(
+                torch.Tensor.copy_, (fx_attr_node, ts_graph_tensor_input)
+            )
+        else:
+            self.name_to_non_tensor_attribute_node[attr_fqn] = ts_graph_tensor_input
+    def convert_call_function_op(self, node: torch._C.Node):
+        target = get_op_overload(node)
+        args, kwargs = self.get_args_kwargs(node, target._schema)
+        fx_node = self.fx_graph.call_function(target, args, kwargs)
+        # TODO: covnert sourceRange() into stack_trace
+        # fx_node.meta["stack_trace"] = node.sourceRange()
+        if node.outputsSize() == 1:
+            output_name = node.output().debugName()
+            self.name_to_node[output_name] = fx_node
+        else:
+            for i, outp in enumerate(node.outputs()):
+                output_name = outp.debugName()
+                next_fx_node = self.fx_graph.call_function(
+                    operator.getitem, (fx_node, i)
+                )
+                self.name_to_node[output_name] = next_fx_node
+    def convert_prim_TupleConstruct(self, node: torch._C.Node):
+        self._convert_prim_iterator(node)
+    def convert_prim_ListConstruct(self, node: torch._C.Node):
+        self._convert_prim_iterator(node)
+    def _convert_prim_iterator(self, node: torch._C.Node):
+        output_list = []
+        for inp in node.inputs():
+            output_list.append(self.get_fx_value_by_ir_value(inp))
+        output_name = node.output().debugName()
+        self.name_to_node[output_name] = output_list
+    def convert_prim_DictConstruct(self, node: torch._C.Node):
+        output_dict = {}
+        k, v = None, None
+        for i, inp in enumerate(node.inputs()):
+            # We assume key value are stored in pair in the DictConstruct.
+            # The first element is the key and the following is the value.
+            if i % 2 == 0:
+                k = self.get_fx_value_by_ir_value(inp)
+            else:
+                v = self.get_fx_value_by_ir_value(inp)
+                assert (
+                    k is not None and v is not None
+                ), "DictConstruct has an empty key value pair."
+                output_dict[k] = v
+                k, v = None, None
+        assert (
+            k is None and v is None
+        ), "DictConstruct has an odd number of elements (violating our assumption)."
+        output_name = node.output().debugName()
+        self.name_to_node[output_name] = output_dict
+    def convert_prim_ListUnpack(self, node: torch._C.Node):
+        self._convert_prim_unpack_iterator(node)
+    def convert_prim_TupleUnpack(self, node: torch._C.Node):
+        self._convert_prim_unpack_iterator(node)
+    def _convert_prim_unpack_iterator(self, node: torch._C.Node):
+        # Single input and multiple outputs for unpacking.
+        for i, outp in enumerate(node.outputs()):
+            outp_name = outp.debugName()
+            inp = self.get_fx_value_by_ir_value(node.input())
+            fx_node = self.fx_graph.call_function(operator.getitem, (inp, i))
+            self.name_to_node[outp_name] = fx_node
+    def convert_aten_Int(self, node: torch._C.Node):
+        # converts aten::Int as aten._to_copy + aten::_local_scalar_dense
+        target = torch.ops.aten._to_copy.default
+        args = tuple(self.get_fx_value_by_ir_value(input) for input in node.inputs())
+        to_copy_node = self.fx_graph.call_function(target, args, {"dtype": torch.int32})
+        fx_node = self.fx_graph.call_function(
+            torch.ops.aten._local_scalar_dense.default, (to_copy_node,)
+        )
+        # TODO: covnert sourceRange() into stack_trace
+        # fx_node.meta["stack_trace"] = node.sourceRange()
+        output_name = node.output().debugName()
+        self.name_to_node[output_name] = fx_node
+    def convert_prim_NumToTensor(self, node: torch._C.Node):
+        # Converts prim::NumToTensor as aten.scalar_tensor.
+        # prim::NumToTensor IRs are currently triggered by:
+        # .size() https://github.com/pytorch/pytorch/blob/main/torch/csrc/jit/frontend/tracer.cpp#L950
+        # .numel() https://github.com/pytorch/pytorch/blob/main/torch/csrc/jit/frontend/tracer.cpp#L971
+        # For both of those APIs, torch.jit.trace implicitly sets the output tensor type
+        # to be LongTensor.
+        target = torch.ops.aten.scalar_tensor
+        args = tuple(self.get_fx_value_by_ir_value(input) for input in node.inputs())
+        fx_node = self.fx_graph.call_function(target, args, {"dtype": torch.long})
+        output_name = node.output().debugName()
+        self.name_to_node[output_name] = fx_node
+    def convert_prim_CreateObject(self, node: torch._C.Node):
+        output_name = node.output().debugName()
+        self.name_to_attribute_fqn[output_name] = ""
+    def convert_aten__convolution(self, node: torch._C.Node):
+        # converts aten::_convolution as aten.convolution, since aten::_convolution
+        # doesn't have a meta function
+        target = torch.ops.aten.convolution.default
+        args, kwargs = self.get_args_kwargs(node, target._schema)
+        fx_node = self.fx_graph.call_function(target, args, kwargs)
+        output_name = node.output().debugName()
+        self.name_to_node[output_name] = fx_node
+    def convert_aten_div(self, node: torch._C.Node):
+        target = get_op_overload(node)
+        schema = target._schema
+        args, kwargs = self.get_args_kwargs(node, schema)
+        # converts aten::div.Tensor_mode(x, tensor_constant)
+        # as aten.div.Scalar_mode(x, tensor_constant.item())
+        if schema.overload_name == "Tensor_mode":
+            arg1_name = args[1].name
+            if arg1_name in self.name_to_constant and isinstance(
+                self.name_to_constant[arg1_name], torch.Tensor
+            ):
+                tensor_constant = self.name_to_constant[arg1_name]
+                if tensor_constant.numel() == 1:
+                    updated_args = list(args)
+                    updated_args[1] = self.name_to_constant[arg1_name].item()
+                    fx_node = self.fx_graph.call_function(
+                        torch.ops.aten.div.Scalar_mode,
+                        tuple(updated_args),
+                        kwargs,
+                    )
+                    # TODO: covnert sourceRange() into stack_trace
+                    # fx_node.meta["stack_trace"] = node.sourceRange()
+                    output_name = node.output().debugName()
+                    self.name_to_node[output_name] = fx_node
+                    return
+        self.convert_call_function_op(node)
+    def convert_aten___getitem__(self, node: torch._C.Node):
+        input_container, index = tuple(
+            self.get_fx_value_by_ir_value(input) for input in node.inputs()
+        )
+        fx_node = self.fx_graph.call_function(
+            operator.getitem, (input_container, index)
+        )
+        output_name = node.output().debugName()
+        self.name_to_node[output_name] = fx_node
+    def convert_aten_to(self, node: torch._C.Node):
+        target = get_op_overload(node)
+        args, kwargs = self.get_args_kwargs(node, target._schema)
+        # special handle aten.to.dtype and aten.to.prim_dtype followed by inplace_mutation_op
+        # coz aten.to + inplace_mutation_op pattern would trigger
+        # "cannot mutate tensors with frozen storage" functionalization error.
+        # To work around the issue, we override the copy to be True, so that the output
+        # is for sure not an alias of input
+        if target == torch.ops.aten.to.dtype or target == torch.ops.aten.to.prim_dtype:
+            user_nodes = [use.user for use in node.output().uses()]
+            user_targets = [
+                get_op_overload(user_node)
+                for user_node in user_nodes
+                if user_node.schema() != "(no schema)"
+            ]
+            has_mutable_target = any(
+                target._schema.is_mutable for target in user_targets
+            )
+            if has_mutable_target:
+                assert len(args) >= 4
+                new_args = list(args)
+                new_args[3] = True  # copy, override to True
+                fx_node = self.fx_graph.call_function(
+                    torch.ops.aten.to.dtype, tuple(new_args)
+                )
+                # temp hack to work around the issue https://github.com/pytorch/pytorch/issues/131679
+                # When this issue is fixed, the clone node would be no longer needed
+                clone_node = self.fx_graph.call_function(
+                    torch.ops.aten.clone.default, (fx_node,)
+                )
+                output_name = node.output().debugName()
+                self.name_to_node[output_name] = clone_node
+                return
+        self.convert_call_function_op(node)
+    def convert_aten_add(self, node: torch._C.Node):
+        if node.schema() == "(no schema)":
+            if isinstance(node.inputsAt(0).type(), torch.ListType) and isinstance(
+                node.inputsAt(1).type(), torch.ListType
+            ):
+                target = torch.ops.aten.add.t
+            else:
+                raise RuntimeError(f"unable to determind the target for {node}")
+        else:
+            target = get_op_overload(node)
+        if target == torch.ops.aten.add.t:
+            # special handle python list/tuple add: "aten::add.t(t[] a, t[] b) -> t[]" for
+            # RuntimeError: aten::add() Expected a value of type 'List[t]' for argument 'a' but instead found type 'immutable_list'.
+            args, kwargs = self.get_args_kwargs(node, target._schema)
+            output_name = node.output().debugName()
+            self.name_to_node[output_name] = self.fx_graph.call_function(list_add, args)
+        else:
+            self.convert_call_function_op(node)
+    def _check_prim_loop_support(self, node):
+        inputs = list(node.inputs())
+        # TODO: (1/N) stage.
+        if inputs[0].debugName() not in self.name_to_constant:
+            raise RuntimeError(
+                "prim::Loop currently cannot run with dynamic value of number of iterations."
+            )
+        # Make sure the condition is not updated in the subblock.
+        subblock = next(node.blocks())
+        condition_output_name = next(subblock.outputs()).debugName()
+        for node in subblock.nodes():
+            if (
+                node.outputsSize() == 1
+                and node.output().debugName() == condition_output_name
+            ):
+                raise RuntimeError(
+                    "prim::Loop currently cannot run with dynamic value of condition."
+                )
+            if node.outputsSize() >= 2:
+                for outp in node.outputs():
+                    if outp.debugName() == condition_output_name:
+                        raise RuntimeError(
+                            "prim::Loop currently cannot run with dynamic value of condition."
+                        )
+    def convert_prim_Loop(self, node: torch._C.Node):
+        inputs = list(node.inputs())
+        self._check_prim_loop_support(node)
+        num_iterations = self.get_fx_value_by_ir_value(inputs[0])
+        # Find inputs.
+        loop_local_arguments = [inp.debugName() for inp in inputs[2:]]
+        global_arguments = self._identify_inputs_as_arguments(node)
+        # Lift parameters as inputs.
+        for block in node.blocks():
+            global_arguments = global_arguments.union(
+                self.blocks_to_lifted_attrs[block]
+            )
+        global_arguments = list(global_arguments)
+        subgraph_nodes, subgraph_converters = self._convert_block_to_subgraph(
+            node, global_arguments
+        )
+        assert len(subgraph_nodes) == 1
+        subgraph_converter = subgraph_converters[0]
+        if not self.is_top_level_graph():
+            self.name_update_from_subblock_to_parent = (
+                self.name_update_from_subblock_to_parent.union(
+                    subgraph_converter.name_update_from_subblock_to_parent
+                )
+            )
+        fx_block_args = [
+            self.get_fx_value_by_fqn(name)
+            for name in loop_local_arguments + global_arguments
+        ]
+        for iter_idx in range(num_iterations):
+            loop_node = self.fx_graph.call_function(
+                execute_subgraph_from_prim_loop,
+                # Check execute_node function for the expected arguments order.
+                (
+                    subgraph_nodes[0],
+                    iter_idx,
+                    len(loop_local_arguments),
+                    *fx_block_args,
+                ),
+                {},
+            )
+            # Update the value of loop local variables.
+            if node.outputsSize() >= 1:
+                for i, outp in enumerate(node.outputs()):
+                    output_name = outp.debugName()
+                    self.name_to_node[output_name] = self.fx_graph.call_function(
+                        operator.getitem,
+                        (
+                            loop_node,
+                            i + 1,
+                        ),  # + 1 because the 0th element is the condition.
+                    )
+                    fx_block_args[i] = self.name_to_node[output_name]
+            # Update the value of global variables, whose values are modified inplace.
+            for i, name in enumerate(
+                subgraph_converter.name_update_from_subblock_to_parent
+            ):
+                self.name_to_node[name] = self.fx_graph.call_function(
+                    operator.getitem,
+                    (
+                        loop_node,
+                        i + node.outputsSize() + 1,
+                    ),  # + 1 because the 0th element is the condition.
+                )
+                global_argument_index = global_arguments.index(name)
+                fx_block_args[
+                    i + node.outputsSize() + global_argument_index
+                ] = self.name_to_node[name]
+    def _check_set_attr_in_if_block(self, if_node: torch._C.Node):
+        for block in if_node.blocks():
+            for node in block.nodes():
+                if node.kind() == "prim::SetAttr":
+                    raise RuntimeError(
+                        "During converting prim::If to torch.cond, found prim::SetAttr op"
+                        " which is not supported yet. Please file an issue if you come "
+                        "across this error."
+                    )
+    def convert_prim_If(self, node: torch._C.Node):
+        self._check_set_attr_in_if_block(node)
+        inputs = list(node.inputs())
+        assert len(inputs) == 1
+        predicate = self.get_fx_value_by_ir_value(inputs[0])
+        # Find inputs.
+        arguments = self._identify_inputs_as_arguments(node)
+        # Lift parameters as inputs.
+        for block in node.blocks():
+            arguments = arguments.union(self.blocks_to_lifted_attrs[block])
+        arguments = list(arguments)
+        subgraph_nodes, _ = self._convert_block_to_subgraph(node, arguments)
+        assert len(subgraph_nodes) == 2
+        fx_block_args = [self.get_fx_value_by_fqn(name) for name in arguments]
+        args = (
+            predicate,
+            subgraph_nodes[0],
+            subgraph_nodes[1],
+            tuple(fx_block_args),
+        )
+        cond_node = self.fx_graph.call_function(torch.cond, args, {})
+        # prim::If can also have zero output.
+        if node.outputsSize() == 1:
+            output_name = node.output().debugName()
+            self.name_to_node[output_name] = cond_node
+        elif node.outputsSize() > 1:
+            for i, output in enumerate(node.outputs()):
+                output_name = output.debugName()
+                getitem = self.fx_graph.call_function(operator.getitem, (cond_node, i))
+                self.name_to_node[output_name] = getitem
+    def convert_aten_Bool(self, node: torch._C.Node):
+        self._convert_as_noop(node)
+    def convert_prim_Enter(self, node: torch._C.Node):
+        # export generally treats prim::Enter as noop
+        # The only context manager export supports is aten::enable_grad.
+        # Unfortunately, TorchScript does not support aten::enable_grad yet.
+        # TODO: support aten::enable_grad in both TorchScript and Converter.
+        return
+    def convert_prim_Exit(self, node: torch._C.Node):
+        # export treats prim::Exit as noop
+        return
+    def _convert_as_noop(self, node: torch._C.Node):
+        # Converts the node as a no-op by mapping its output node as arg[0]
+        target = get_op_overload(node)
+        schema = target._schema
+        args, kwargs = self.get_args_kwargs(node, schema)
+        output_name = node.output().debugName()
+        self.name_to_node[output_name] = args[0]
+    def convert_profiler__record_function_exit(self, node: torch._C.Node):
+        # _record_function_exit has side effect so we keep it in fx.graph
+        # currently, _record_function_enter_new and _record_function_exit are
+        # discarded during `retrace_as_exported_program`.
+        target = torch.ops.profiler._record_function_exit
+        args = tuple(self.get_fx_value_by_ir_value(input) for input in node.inputs())
+        self.fx_graph.call_function(target, args)
+    def convert_prim_tolist(self, node: torch._C.Node):
+        # prim::tolist cannot be supported by `_convert_standard_operators`
+        # since it requires call_method instead of call_function.
+        target = "tolist"
+        args = (self.get_fx_value_by_ir_value(next(node.inputs())),)
+        fx_node = self.fx_graph.call_method(target, args)
+        output_name = node.output().debugName()
+        self.name_to_node[output_name] = fx_node
+    def convert_prim_Uninitialized(self, node: torch._C.Node):
+        # `prim::Uninitialized` is inserted by the compiler when it can prove
+        # the value will never be used. It can be introduced by exceptions,
+        # breaks, continues, and returns.
+        # So we add a dummy constant to the graph.
+        output_name = node.output().debugName()
+        self.name_to_constant[output_name] = torch.Tensor()
+    def _convert_standard_operators(self, node: torch._C.Node):
+        target = kind_to_standard_operators[node.kind()]
+        args = tuple(self.get_fx_value_by_ir_value(input) for input in node.inputs())
+        fx_node = self.fx_graph.call_function(target, args)
+        output_name = node.output().debugName()
+        self.name_to_node[output_name] = fx_node
+    def convert_node(self, node: torch._C.Node):
+        node_kind = node.kind()
+        # Get handler based on namespace and operator name.
+        # Provide a default node handler as well in case we don't find
+        # matching converter for that.
+        handler_func_name = ir_name_to_func_name(node_kind)
+        handler_func = getattr(self, handler_func_name, self.convert_call_function_op)
+        # str calls print function implemented in CPP. To avoid repeating
+        # the entire logic here, we simply keep first line from node string (getting rid
+        # of sub-blocks IR prints).
+        node_str = "".join(str(node).split("\n")[:1])
+        log.debug("[%s] converts [%s]", handler_func.__name__, node_str)
+        try:
+            handler_func(node)
+        except Exception as e:
+            raise RuntimeError(f"TS2EPConverter failed for node {node_kind}") from e
+    def convert_graph_outputs(self):
+        args = []
+        outp_name_list = [outp.debugName() for outp in self.ts_graph.outputs()] + list(
+            self.name_update_from_subblock_to_parent
+        )
+        for output_name in outp_name_list:
+            if output_name in self.name_to_node:
+                fx_node = self.name_to_node[output_name]
+                # TODO: Revisit this later after HigherOrderOp design changes.
+                # Currently, we cannot directly return input as output.
+                if (
+                    not self.is_top_level_graph()
+                    and isinstance(fx_node, torch.fx.Node)
+                    and fx_node.op == "placeholder"
+                ):
+                    fx_node = self.fx_graph.call_function(torch.clone, (fx_node,))
+                args.append(fx_node)
+                self.output_specs.append(
+                    OutputSpec(
+                        OutputKind.USER_OUTPUT,
+                        arg=TensorArgument(name=output_name),
+                        target=output_name,
+                    )
+                )
+            elif output_name in self.name_to_constant:
+                args.append(self.name_to_constant[output_name])
+                self.output_specs.append(
+                    OutputSpec(
+                        OutputKind.USER_OUTPUT,
+                        arg=ConstantArgument(
+                            name=output_name, value=self.name_to_constant[output_name]
+                        ),
+                        target=output_name,
+                    )
+                )
+            else:
+                raise ValueError(f"Output {output_name} not found")
+        if len(args) == 0:
+            # Sub-block of prim::If can have zero output.
+            self.fx_graph.output([])
+        elif len(args) == 1:
+            self.fx_graph.output(
+                args[0]
+            )  # Get rid of an extra list wrapped around final output.
+        elif len(args) > 1:
+            self.fx_graph.output(
+                args
+            )  # For prim::Loop and prim::If with multiple outputs.
+        else:
+            # Sub-block of prim::Loop can have multiple outputs.
+            self.fx_graph.output(args)
+class ExplainTS2FXGraphConverter(TS2FXGraphConverter):
+    """
+    Run TS2FXGraphConverter in an explain mode. It collects all failed operators conversions
+    and provide that information to users. In order to collect all failed conversions, it
+    also mocks some internal attributes (e.g., name_to_node).
+    """
+    class _DictMock(dict):
+        def __init__(self, dict_data, mock_value):
+            super().__init__(dict_data)
+            self.mock_value = mock_value
+        def __getitem__(self, key):
+            # If the original dictionary has the key, return its value.
+            # Otherwise, return the mock value.
+            if not super().__contains__(key):
+                return self.mock_value
+            return super().__getitem__(key)
+        def __contains__(self, key):
+            return True
+    def __init__(
+        self,
+        ts_graph: Union[torch._C.Graph, torch._C.Block],
+        name_to_param: Dict[str, torch.Tensor],
+        name_to_buffer: Dict[str, torch.Tensor],
+        blocks_to_lifted_attrs: Dict[torch._C.Block, Set[str]],
+        name_to_non_tensor_attribute: Dict[str, Any],
+        name_to_constant: Dict[str, Any],
+    ):
+        super().__init__(
+            ts_graph,
+            name_to_param,
+            name_to_buffer,
+            blocks_to_lifted_attrs,
+            name_to_non_tensor_attribute,
+            name_to_constant,
+        )
+        # Data to keep track of unsupported nodes.
+        self.unsupported_node_list: List[torch._C.Node] = []
+        # Add mock to needed attributes.
+        self.name_to_node = ExplainTS2FXGraphConverter._DictMock(
+            self.name_to_node,
+            # Dummy node.
+            torch.fx.Node(
+                None,  # type: ignore[arg-type]
+                "mock",
+                "call_function",
+                lambda: None,
+                (),
+                {},
+            ),
+        )
+    def explain(self):
+        self.convert_graph_inputs()
+        for node in self.ts_graph.nodes():
+            self.convert_node(node)
+        self.convert_graph_outputs()
+    def convert_node(self, node):
+        try:
+            super().convert_node(node)
+        except Exception as e:
+            self.unsupported_node_list.append(node)
+@contextmanager
+def disable_logging(log):
+    disabled = log.disabled
+    log.disabled = True
+    try:
+        yield
+    finally:
+        log.disabled = disabled
+class TS2EPConverter:
+    # TorchScript model to ExportedProgram converter
+    def __init__(
+        self,
+        ts_model: Union[torch.jit.ScriptModule, torch.jit.ScriptFunction],
+        sample_args: Tuple[Any, ...],
+        sample_kwargs: Optional[Dict[str, Any]] = None,
+    ):
+        self.ts_model = ts_model
+        self.ts_graph, self.params, _, _ = _create_jit_graph(ts_model, sample_args)
+        self.sample_args = sample_args
+        self.sample_kwargs = sample_kwargs
+        self.name_to_param: Dict[str, torch.Tensor] = {}
+        self.name_to_buffer: Dict[str, torch.Tensor] = {}
+        param_list = (
+            list(self.ts_model.parameters())
+            if not isinstance(self.ts_model, torch._C.ScriptFunction)
+            else []
+        )
+        if not isinstance(self.ts_model, torch._C.ScriptFunction):
+            for k, tensor in self.ts_model.state_dict().items():  # type: ignore[union-attr]
+                # Check if tensor belongs to any parameter.
+                if any(
+                    (tensor == param).all()
+                    for param in param_list
+                    if tensor.shape == param.shape
+                ):
+                    self.name_to_param[k] = tensor
+                else:
+                    self.name_to_buffer[k] = tensor
+        self.name_to_non_tensor_attributes: Dict[str, Any] = {}
+        self.name_to_constant: Dict[str, Any] = {}
+        self.lift_get_attr()
+    def convert(self) -> ExportedProgram:
+        log.info(
+            """
+TS2EPConverter logging starts from here.
+INFO: (TORCH_LOGS="export" <cmd>)
+    * Log TorchScript IR.
+DEBUG: (TORCH_LOGS="+export" <cmd>), additionally
+    * Log conversion IR by IR in a format of [<conversion handler name>] converts [<IR>].
+        """
+        )
+        log.info("TorchScript graph\n\n%s\n", self.ts_graph)
+        blocks_to_lifted_attrs = get_block_to_lifted_attrs(self.ts_graph)
+        graph_converter = TS2FXGraphConverter(
+            self.ts_graph,
+            self.name_to_param,
+            self.name_to_buffer,
+            blocks_to_lifted_attrs,
+            self.name_to_non_tensor_attributes,
+            self.name_to_constant,
+        )
+        gm = graph_converter.convert()
+        # Post-proccessing step to deal with quantized operators.
+        replace_quantized_ops_with_standard_ops(gm)
+        log.info("GraphModule: %s", gm.print_readable(print_output=False))
+        ep = self.retrace_as_exported_program(
+            gm,
+            graph_converter.name_to_constant,
+        )
+        log.info("%s", ep)
+        # Post-processing step to ensure ExportedProgram has the same state_dict as
+        # the original TorchScript model. Throw warnings for additionally populated
+        # state_dict entries.
+        if not isinstance(self.ts_model, torch._C.ScriptFunction):
+            for k, tensor in self.ts_model.state_dict().items():  # type: ignore[union-attr]
+                if k not in ep.state_dict:
+                    warnings.warn(
+                        f"Manually populate {k} into state_dict ExportedProgram, but it is never used by the ExportedProgram."
+                    )
+                    ep.state_dict[k] = tensor
+        return ep
+    @disable_logging(log)
+    def explain(self, print_output=True):
+        blocks_to_lifted_attrs = get_block_to_lifted_attrs(self.ts_graph)
+        graph_converter = ExplainTS2FXGraphConverter(
+            self.ts_graph,
+            self.name_to_param,
+            self.name_to_buffer,
+            blocks_to_lifted_attrs,
+            self.name_to_non_tensor_attributes,
+            self.name_to_constant,
+        )
+        graph_converter.explain()
+        if len(graph_converter.unsupported_node_list) > 0:
+            explain_str = "Unsupported nodes are found in the following list:"
+            for i, n in enumerate(graph_converter.unsupported_node_list):
+                node_str = "".join(str(n).split("\n")[:1])
+                explain_str += f"\n\n    {i}. {n.kind()} [{node_str}]"
+        else:
+            explain_str = "Success!"
+        if print_output:
+            print(explain_str)
+        return explain_str
+    def retrace_as_exported_program(
+        self,
+        gm: torch.fx.GraphModule,
+        name_to_constant: Dict[str, Any],
+    ):
+        # TODO: adjust input orders to match GraphSignature convention
+        ep = torch.export._trace._export(
+            gm,
+            self.sample_args,
+            strict=False,
+            pre_dispatch=True,
+        )
+        # Post-processing to make sure the ExportedProgram states are correct.
+        # Because during conversion, we set tensor constants as GetAttr,
+        # retracing cannot recognize them as tensor constants but instead
+        # treat them as buffers. We need to set them again here.
+        ep._constants.update(
+            {
+                k: v
+                for k, v in name_to_constant.items()
+                if isinstance(v, (torch.Tensor, torch.ScriptObject))
+            }
+        )
+        for k in name_to_constant:
+            ep.state_dict.pop(k, None)
+        for i, spec in enumerate(ep.graph_signature.input_specs):
+            # Mark as constant tensors for erroneously traced buffers.
+            if spec.kind == InputKind.BUFFER and spec.target in name_to_constant:
+                assert isinstance(
+                    name_to_constant[spec.target], torch.Tensor
+                ), f"{type(name_to_constant[spec.target])} has been erroneously marked as buffer"
+                spec.kind = InputKind.CONSTANT_TENSOR
+        ep.verifier().check(ep)
+        return ep
+    def lift_get_attr(self):
+        # This function lifts multiple data types.
+        #     1. Tensor constants attributes (e.g., self.data = torch.tensor([2,3]))
+        #     to buffers. Currently, when there are tensor constants, export
+        #     would error and ask users to register tensor constants as buffers.
+        #     Since it is hard to manually do so for TorchScript models
+        #     (e.g., source code is missing), this function automatically
+        #     lifts tensor constants to be buffers.
+        #     2. ScriptObbject to constant. It will then be converted to getattr in
+        #     in the fx graph.
+        #
+        # This function should happen in TS2EPConverter instead of
+        # TS2FXGraphConverter since it gets attributes from self.ts_model
+        # which is not accessable in TS2FXGraphConverter. It is similar to where
+        # we collect self.name_to_param and self.name_to_buffer.
+        name_to_attribute_fqn: Dict[str, str] = {}
+        def get_attr(fqn: str):
+            name = fqn.split(".")
+            v = self.ts_model
+            for n in name:
+                v = getattr(v, n)
+            return v
+        def get_fqn(node: torch._C.Node):
+            attr_name = node.s("name")
+            input_name = node.input().debugName()
+            root_attr_name = name_to_attribute_fqn[input_name]
+            attr_fqn = f"{root_attr_name}.{attr_name}" if root_attr_name else attr_name
+            return attr_fqn
+        def _dfs_get_attr(block):
+            for node in block.nodes():
+                if node.kind() == "prim::CreateObject":
+                    output_name = node.output().debugName()
+                    name_to_attribute_fqn[output_name] = ""
+                if node.kind() == "prim::GetAttr":
+                    attr_fqn = get_fqn(node)
+                    value = get_attr(attr_fqn)
+                    output_name = node.output().debugName()
+                    name_to_attribute_fqn[output_name] = attr_fqn
+                    if isinstance(value, torch.Tensor):
+                        if attr_fqn not in self.name_to_buffer:
+                            # Lift tensor constants to be a buffer
+                            self.name_to_buffer[attr_fqn] = value
+                    elif isinstance(value, torch.ScriptObject):
+                        if attr_fqn not in self.name_to_constant:
+                            self.name_to_constant[attr_fqn] = value
+                    else:
+                        self.name_to_non_tensor_attributes[attr_fqn] = value
+                for subblock in node.blocks():
+                    _dfs_get_attr(subblock)
+        _dfs_get_attr(self.ts_graph)

.venv/lib/python3.11/site-packages/torch/_export/non_strict_utils.py ADDED Viewed

	@@ -0,0 +1,523 @@

+# mypy: allow-untyped-defs
+import contextlib
+import inspect
+import logging
+from collections import defaultdict
+from typing import Any, Callable, Dict, List, Tuple, TYPE_CHECKING, Union
+import torch
+import torch.utils._pytree as pytree
+from torch._dynamo.source import (
+    AttrSource,
+    GetItemSource,
+    LocalSource,
+    TensorProperty,
+    TensorPropertySource,
+)
+from torch._dynamo.variables.builder import TrackedFake
+from torch._export.passes.add_runtime_assertions_for_constraints_pass import InputDim
+from torch._export.passes.lift_constants_pass import ConstantAttrMap
+from torch._guards import Source
+from torch._library.fake_class_registry import FakeScriptObject
+from torch._subclasses.fake_tensor import FakeTensorMode
+from torch.export import Constraint
+from torch.export.dynamic_shapes import (
+    _check_dynamic_shapes,
+    _combine_args,
+    _DimHint,
+    _process_dynamic_shapes,
+    _transform_shapes_for_default_dynamic,
+    _tree_map_with_path,
+)
+from torch.export.graph_signature import CustomObjArgument
+from torch.fx.experimental import _config as config
+from torch.fx.experimental.symbolic_shapes import (
+    _find_user_code_frame,
+    _suggest_fixes_for_data_dependent_error_non_strict,
+    ConstraintViolationError,
+    DimDynamic,
+    EqualityConstraint,
+    GuardOnDataDependentSymNode,
+    ShapeEnv,
+    StatelessSymbolicContext,
+    ValueRanges,
+)
+from torch.utils._pytree import (
+    GetAttrKey,
+    KeyPath,
+    MappingKey,
+    SequenceKey,
+    tree_map_with_path,
+)
+if TYPE_CHECKING:
+    from sympy import Symbol
+log = logging.getLogger(__name__)
+def key_path_to_source(kp: KeyPath) -> Source:
+    """
+    Given a key path, return the source for the key path.
+    """
+    source: Source = LocalSource("args")
+    for k in kp:
+        if isinstance(k, SequenceKey):
+            source = GetItemSource(source, k.idx)
+        elif isinstance(k, MappingKey):
+            source = GetItemSource(source, k.key)
+        elif isinstance(k, GetAttrKey):
+            source = AttrSource(source, k.name)
+        else:
+            raise ValueError(f"Unknown KeyEntry {k}")
+    return source
+def _is_constant_argument(t):
+    return t is None or isinstance(t, (int, float, bool, str))
+def fakify(
+    mode: FakeTensorMode,
+    kp: KeyPath,
+    t: Any,
+    t_constraints: Dict[int, Dict[int, Constraint]],
+    sources: Dict[Tuple[int, int], List[Source]],
+):
+    source = key_path_to_source(kp)
+    if _is_constant_argument(t) or isinstance(t, torch.ScriptObject):
+        return t
+    if not isinstance(t, torch.Tensor):
+        raise ValueError(f"Unsupported input type {type(t)}")
+    n_dims = len(t.shape)
+    symbolic_context = StatelessSymbolicContext(
+        dynamic_sizes=[DimDynamic.DYNAMIC] * n_dims,
+        constraint_sizes=[None] * n_dims,
+    )
+    t_id = id(t)
+    assert mode.shape_env is not None
+    if t_id in t_constraints:
+        for i, constraint in t_constraints[t_id].items():
+            symbolic_context.constraint_sizes[i] = constraint.constraint_range
+            src = TensorPropertySource(base=source, prop=TensorProperty.SIZE, idx=i)
+            sources[(t_id, i)].append(src)
+            mode.shape_env.source_name_to_debug_name[src.name()] = constraint.name  # type: ignore[assignment]
+    fake = mode.from_tensor(t, source=source, symbolic_context=symbolic_context)
+    mode.shape_env.tracked_fakes.append(TrackedFake(fake, source, symbolic_context))  # type: ignore[union-attr]
+    return fake
+def make_fake_inputs(
+    nn_module,
+    args,
+    kwargs,
+    dynamic_shapes,
+    _is_torch_jit_trace=False,
+    allow_complex_guards_as_runtime_asserts=False,
+):
+    """
+    Given an nn module, example inputs, and constraints, return a new fake mode,
+    fake inputs created in that mode whose dynamic shape dimensions are constrained
+    by the given ranges, and sources for pairs of dynamic shape dimensions that are
+    constrained to be equal.
+    """
+    # TODO(avik): refactor Dynamo to avoid duplication of the following code
+    # between non-strict and strict.
+    # Specifically, here (non-strict) we do the following pre-tracing steps:
+    #   - Fakify inputs.
+    #   - Process input shape equalities.
+    # In strict, these steps are spread across multiple files:
+    #   - output_graph.py fakifies inputs.
+    #   - [post-tracing] guards.py processes input shape equalities.
+    combined_args = _combine_args(nn_module, args, kwargs)
+    _check_dynamic_shapes(combined_args, dynamic_shapes)
+    transformed_dynamic_shapes = _transform_shapes_for_default_dynamic(
+        combined_args, dynamic_shapes
+    )
+    constraints = _process_dynamic_shapes(combined_args, transformed_dynamic_shapes)
+    t_constraints: Dict[int, Dict[int, Constraint]] = defaultdict(dict)
+    for constraint in constraints:
+        t_constraints[constraint.t_id][constraint.dim] = constraint
+    context = torch._guards.TracingContext.try_get()
+    if context is not None:
+        # This occurs when we are exporting within dynamo. There already exists
+        # a toplevel TracingContext with a fake mode, so we do not want to
+        # create another fake mode.
+        fake_mode = context.fake_mode
+    elif not _is_torch_jit_trace:
+        code = nn_module.forward.__code__
+        co_fields = {
+            "co_name": code.co_name,
+            "co_filename": code.co_filename,
+            "co_firstlineno": code.co_firstlineno,
+        }
+        fake_mode = FakeTensorMode(
+            shape_env=ShapeEnv(
+                tracked_fakes=[],
+                co_fields=co_fields,
+                prefer_deferred_runtime_asserts_over_guards=True,
+                allow_complex_guards_as_runtime_asserts=allow_complex_guards_as_runtime_asserts,
+            ),
+            allow_non_fake_inputs=True,
+            export=True,
+        )
+    else:
+        fake_mode = FakeTensorMode(
+            shape_env=ShapeEnv(
+                tracked_fakes=[],
+                prefer_deferred_runtime_asserts_over_guards=True,
+                allow_complex_guards_as_runtime_asserts=allow_complex_guards_as_runtime_asserts,
+            ),
+            allow_non_fake_inputs=True,
+        )
+    if fake_mode.shape_env is None or fake_mode.shape_env.tracked_fakes is None:
+        raise ValueError(
+            "Detected fake_mode does not have a shape_env with tracked fakes. "
+            "If you constructed the module under a FakeTensorMode, "
+            "please initialize it like: FakeTensorMode(shape_env=ShapeEnv(tracked_fakes=[]))"
+        )
+    with fake_mode:
+        # FIXME(ycao) ScriptMethod doesn't have signature, I am using an empty one to unblock
+        if not _is_torch_jit_trace:
+            original_signature = inspect.signature(nn_module.forward)
+        else:
+            original_signature = None
+        sources: Dict[Tuple[int, int], List[Source]] = defaultdict(list)
+        fake_args, fake_kwargs = tree_map_with_path(
+            lambda kp, val: fakify(fake_mode, kp, val, t_constraints, sources),
+            (args, kwargs),
+        )
+        names: Dict[str, Tuple[int, int]] = {}
+        source_pairs: List[Tuple[Source, Source]] = []
+        derived_equalities: List[Tuple[Source, Union[Source, Symbol], Callable]] = []
+        phantom_symbols: Dict[str, Symbol] = {}
+        for constraint in constraints:
+            torch.export.dynamic_shapes._process_equalities(
+                constraint,
+                lambda t_id, dim: sources[(t_id, dim)],
+                fake_mode.shape_env,
+                names,
+                source_pairs,
+                derived_equalities,
+                phantom_symbols,
+            )
+        equalities_inputs = EqualityConstraint(
+            source_pairs=source_pairs,
+            derived_equalities=derived_equalities,
+            phantom_symbols=list(phantom_symbols.values()),
+            warn_only=False,
+        )
+        return (
+            fake_mode,
+            fake_args,
+            fake_kwargs,
+            equalities_inputs,
+            original_signature,
+            transformed_dynamic_shapes,
+        )
+def _flatten_dynamic_shapes(
+    combined_args: Dict[str, Any],
+    dynamic_shapes: Union[Dict[str, Any], Tuple[Any], List[Any]],
+) -> List[Any]:
+    flat_shapes = []
+    def _tree_map_helper(path, t, shape):
+        nonlocal flat_shapes
+        flat_shapes.append(shape)
+    _tree_map_with_path(_tree_map_helper, combined_args, dynamic_shapes)
+    return flat_shapes
+def produce_guards_and_solve_constraints(
+    fake_mode: FakeTensorMode,
+    gm: torch.fx.GraphModule,
+    dynamic_shapes: Union[Dict[str, Any], Tuple[Any], List[Any], None],
+    equalities_inputs: EqualityConstraint,
+    original_signature: inspect.Signature,
+    _is_torch_jit_trace=False,
+):
+    """
+    Given a fake mode, sources pairs corresponding to equal dynamic shape dimensions,
+    and a graph module, produce guards on the fake mode's shape env (raising constraint
+    violations if any), solve (to suggest simplifications or fixes).
+    Dynamo already performs this, so this is for non-strict mode.
+    Additional inputs:
+        equalities_inputs: the equality constraints to use for guards
+        original_signature: the signature of the forward method
+    """
+    shape_env = fake_mode.shape_env
+    assert shape_env is not None
+    assert shape_env.tracked_fakes is not None
+    placeholders = [tf.fake for tf in shape_env.tracked_fakes]
+    sources = [tf.source for tf in shape_env.tracked_fakes]
+    input_contexts = [tf.symbolic_context for tf in shape_env.tracked_fakes]
+    constraint_violation_error = None
+    try:
+        shape_env.produce_guards(
+            placeholders,
+            sources,
+            input_contexts=input_contexts,
+            equalities_inputs=equalities_inputs,
+            ignore_static=False,
+        )
+    except ConstraintViolationError as e:
+        constraint_violation_error = e
+    shape_env.frozen = True
+    dim_constraints = shape_env.dim_constraints
+    if dim_constraints is None:
+        # Expected when shape_env.produce_guards throws an early constraint violation error.
+        # There is nothing to solve for in this case.
+        # TODO(avik): Maybe record the constraint violation error instead and replay later?
+        assert constraint_violation_error
+        raise constraint_violation_error
+    dim_constraints.solve()
+    forced_specializations = dim_constraints.forced_specializations()
+    if not _is_torch_jit_trace:
+        msg = dim_constraints.prettify_results(
+            original_signature,
+            dynamic_shapes,
+            constraint_violation_error,
+            forced_specializations,
+        )
+    else:
+        # FIXME(ycao): This is a hack to get around missing signature from ScriptMethod
+        msg = "dummy constraint violation message"
+    if constraint_violation_error:
+        constraint_violation_error.args = (constraint_violation_error.args[0] + msg,)
+    elif forced_specializations:
+        constraint_violation_error = ConstraintViolationError(msg)
+    if constraint_violation_error:
+        raise constraint_violation_error
+def make_constraints(
+    fake_mode: FakeTensorMode,
+    gm: torch.fx.GraphModule,
+    combined_args: Dict[str, Any],
+    dynamic_shapes: Union[Dict[str, Any], Tuple[Any], List[Any], None],
+    num_lifted_inputs: int,
+):
+    """
+    Given a fake mode's shape env and user-specified dynamic shapes,
+    return the resulting range constraints and equality constraints.
+    Additional args:
+        num_lifted_inputs: the number of non-user-input placeholder nodes in the graph
+        (used only to enumerate the user-input nodes)
+    """
+    shape_env = fake_mode.shape_env
+    assert shape_env is not None
+    inline_constraints = gm.meta.get("inline_constraints", [])
+    range_constraints = {
+        symbol: inline_constraints[symbol] for symbol in inline_constraints
+    }
+    if not dynamic_shapes:
+        return range_constraints
+    # get individual dynamic shapes spec for each input
+    if not isinstance(dynamic_shapes, dict):
+        assert isinstance(dynamic_shapes, (tuple, list))
+        combined_args = type(dynamic_shapes)(combined_args.values())  # type: ignore[assignment, misc]
+    flat_dynamic_shapes = _flatten_dynamic_shapes(combined_args, dynamic_shapes)
+    # check number of shapes vs. number of inputs
+    num_placeholders = [node.op == "placeholder" for node in gm.graph.nodes].count(True)
+    assert len(flat_dynamic_shapes) == num_placeholders - num_lifted_inputs
+    input_dims = defaultdict(list)
+    free_symbols = set()
+    for input_index, node in enumerate(gm.graph.nodes):
+        if input_index < num_lifted_inputs or node.op != "placeholder":
+            continue
+        if _is_constant_argument(node.meta["val"]) or isinstance(
+            node.meta["val"], CustomObjArgument
+        ):
+            continue
+        shape_spec = flat_dynamic_shapes[input_index - num_lifted_inputs]
+        for i, d in enumerate(node.meta["val"].shape):
+            if isinstance(d, torch.SymInt) and not d.node.expr.is_number:
+                # Look up the range constraint for the symbol corresponding to this shape dimension
+                # and store it indexed by the symbolic expression corresponding to it.
+                # NOTE(avik): Use node._expr instead of node.expr for the lookup here because
+                # we want the symbol, not its replacement, which could be an expression. Maybe
+                # there's a better way to do this, e.g., by (re)computing value ranges for expressions?
+                dim = shape_spec[i] if shape_spec else None
+                if dim is None or isinstance(dim, _DimHint):
+                    range_constraints[d.node.expr] = shape_env.var_to_range[
+                        d.node._expr
+                    ]
+                else:
+                    range_constraints[d.node.expr] = ValueRanges(
+                        lower=dim.min, upper=dim.max
+                    )
+                input_dims[d.node.expr].append(InputDim(input_name=node.name, dim=i))
+                free_symbols.update(d.node.expr.free_symbols)
+    for symbol in free_symbols:
+        if symbol not in range_constraints:
+            # Placeholders can have symbolic shapes that are derived expressions.
+            # The above code will record direct range constraints for them
+            # so that we can do runtime assertions. In addition, for serde checks
+            # we want to record range constraints for their root symbols.
+            range_constraints[symbol] = shape_env.var_to_range[symbol]
+    return range_constraints
+def _gather_constant_attrs(m: torch.nn.Module) -> ConstantAttrMap:
+    """Search the module hierarchy, gathering up all tensor and ScriptObject constants.
+    Returns a dictionary mapping hash(value) to the name of the constant. We
+    have to abuse `hash` here unfortunately, see: [ScriptObject hash].
+    """
+    constants = ConstantAttrMap()
+    buffers_parameters = set(m.buffers())
+    buffers_parameters.update(m.parameters())
+    def inner(m: torch.nn.Module, prefix_atoms: List[str], constants):
+        for k, v in m.__dict__.items():
+            if isinstance(
+                v,
+                (
+                    torch.Tensor,
+                    torch.ScriptObject,
+                    FakeScriptObject,
+                ),
+            ):
+                if v in buffers_parameters:
+                    # filter out buffers and parameters, leaving only constants
+                    continue
+                fqn = ".".join(prefix_atoms + [k])
+                constants.add(v, fqn)
+        for k, v in m.named_children():
+            inner(v, prefix_atoms + [k], constants)
+    inner(m, [], constants)
+    return constants
+@contextlib.contextmanager
+def _fakify_script_objects(
+    mod: torch.nn.Module,
+    args: Tuple[Any],
+    kwargs: Dict[Any, Any],
+    fake_mode: torch._subclasses.fake_tensor.FakeTensorMode,
+):
+    # This context manager is used to fakify script objects into FakeScriptObject.
+    # Inputs:
+    #   mod: the module to be exported, it (and its recursive submodules)'s script object attrs haven't been fakified.
+    #   args, kwargs: the args and kwargs inputs for mod, script object inputs haven't been fakified.
+    #   fake_mode: the fake mode to be used for fakifying script objects. It's the same mode that fakify input tensors.
+    #
+    # Returns:
+    #   mod: the patched module, its (and its recursive submodules) script object attrs have been fakified.
+    #   fake_args, fake_kwargs: new fakified args and kwargs.
+    #        Script object inputs have been fakified. Don't touch the tensors.
+    #   fake_constant_attrs: a new map from FakeScriptObject to the fqn of the original script object.
+    #   fake_to_real: a mapping between FakeScriptObject and the original script object in order to un-do the patching.
+    constant_attrs: ConstantAttrMap = _gather_constant_attrs(mod)
+    assert not any(
+        isinstance(obj, FakeScriptObject) for obj in constant_attrs.values()
+    ), "Mod shouldn't contain any FakeScriptObject."
+    assert not pytree.tree_any(
+        lambda obj: isinstance(obj, FakeScriptObject), (args, kwargs)
+    ), "args and kwargs shouldn't contain any FakeScriptObject."
+    patched_attr = {}
+    fake_constant_attrs = ConstantAttrMap()
+    fake_to_real = {}
+    def _maybe_fakify_obj(obj):
+        fake_obj = torch._library.fake_class_registry.maybe_to_fake_obj(fake_mode, obj)
+        fake_to_real[fake_obj] = obj
+        return fake_obj
+    def _leaf_mod_and_attr(
+        mod: torch.nn.Module, attr_fqn: str
+    ) -> Tuple[torch.nn.Module, str]:
+        *prefix_attr, last_attr = attr_fqn.split(".")
+        cur_mod = mod
+        for attr in prefix_attr:
+            cur_mod = getattr(cur_mod, attr)
+        return cur_mod, last_attr
+    try:
+        for obj, fqns in constant_attrs.items():
+            if isinstance(obj, torch.ScriptObject):
+                fake_script_obj = _maybe_fakify_obj(obj)
+                for fqn in fqns:
+                    cur_mod, attr = _leaf_mod_and_attr(mod, fqn)
+                    assert obj is getattr(cur_mod, attr)
+                    setattr(cur_mod, attr, fake_script_obj)
+                    fake_constant_attrs.add(fake_script_obj, fqn)
+                    patched_attr[fqn] = obj
+            else:
+                for fqn in fqns:
+                    fake_constant_attrs.add(obj, fqn)
+        fake_args, fake_kwargs = pytree.tree_map_only(
+            torch.ScriptObject, _maybe_fakify_obj, (args, kwargs)
+        )
+        yield (mod, fake_args, fake_kwargs, fake_constant_attrs, fake_to_real)
+    finally:
+        for fqn, orig_obj in patched_attr.items():
+            cur_mod, attr = _leaf_mod_and_attr(mod, fqn)
+            setattr(cur_mod, attr, orig_obj)
+class _NonStrictTorchFunctionHandler(torch.overrides.TorchFunctionMode):
+    """
+    1. Handles data-dependent errors raised by torch function calls in non-strict.
+    Any data-dependent error is due to some condition on unbacked symints
+    that cannot be resolved. A mechanical way of fixing the error is to use
+    a torch._check() call to assert either that condition or its negation.
+    The handler suggests these options as code and points to the location
+    of the torch function call that raised the error as part of the error
+    message shown to the user, who can then simply select and copy-paste
+    a suggested fix at that location.
+    NOTE: Not all data-dependent errors are raised by torch function calls.
+    In particular, conditions on unbacked symints can appear outside such
+    calls, and as such are not handled here.
+    2. Handles line-of-code logging for each torch function call in non-strict.
+    Usage: TORCHEXPORT_EXTENDED_DEBUG_CURRENT_LOC=1 TORCH_LOGS="+export" ...
+    """
+    def __torch_function__(self, func, types, args=(), kwargs=None):
+        kwargs = kwargs or {}
+        if log.isEnabledFor(logging.DEBUG) and config.extended_debug_current_loc:
+            frame = _find_user_code_frame()
+            if frame is not None:
+                log.debug(
+                    "%s called at %s:%s in %s",
+                    func.__qualname__,
+                    frame.f_code.co_filename,
+                    frame.f_lineno,
+                    frame.f_code.co_name,
+                )
+        try:
+            return func(*args, **kwargs)
+        except GuardOnDataDependentSymNode as e:
+            _suggest_fixes_for_data_dependent_error_non_strict(e)
+            raise

.venv/lib/python3.11/site-packages/torch/_export/pass_base.py ADDED Viewed

	@@ -0,0 +1,441 @@

+# mypy: allow-untyped-defs
+import operator
+import traceback
+import typing
+from contextlib import nullcontext
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
+import torch
+from functorch.experimental.control_flow import _unstack_pytree
+from torch import fx
+from torch._dispatch.python import enable_python_dispatcher
+from torch._export.pass_infra.node_metadata import NodeMetadata
+from torch._export.pass_infra.proxy_value import ProxyValue
+from torch._subclasses import FakeTensor, UnsupportedFakeTensorException
+from torch._subclasses.fake_tensor import FakeTensorMode
+from torch.fx import traceback as fx_traceback
+from torch.fx.experimental.proxy_tensor import PythonKeyTracer
+from torch.fx.graph import CodeGen
+from torch.fx.passes.infra.pass_base import PassBase, PassResult
+from torch.fx.passes.shape_prop import _extract_tensor_metadata, TensorMetadata
+from torch.utils import _pytree as pytree
+from torch.fx.experimental.symbolic_shapes import PropagateUnbackedSymInts, compute_unbacked_bindings
+__all__ = ["_ExportPassBaseDeprecatedDoNotUse"]
+Argument = Any
+Value = Any
+Fn = Callable[..., Any]
+PassType = Callable[[torch.fx.GraphModule], Optional[PassResult]]
+_TORCH_SYM_OPS: Set[Callable] = {
+    torch.sym_int,
+    torch.sym_float,
+    torch.sym_ite,
+    torch.sym_max,
+    torch.sym_min,
+    torch.sym_not,
+    torch.sym_sqrt,
+}
+class ExportPassBaseError(RuntimeError):
+    pass
+class _ExportPassBaseDeprecatedDoNotUse(PassBase):
+    """
+    Interpreter-based pass class to help users maintain the IR spec while writing
+    transformations.
+    """
+    @staticmethod
+    def _create_dummy_node_metadata():
+        return NodeMetadata({"stack_trace": "".join(traceback.format_stack(limit=1))})
+    class ExportTracer(PythonKeyTracer):
+        def __init__(self, callback: "_ExportPassBaseDeprecatedDoNotUse", codegen: CodeGen) -> None:
+            super().__init__()
+            self.callback = callback
+            self.root = torch.nn.Module()
+            self.graph = torch.fx.Graph()
+            self.graph.set_codegen(codegen)
+            self.tensor_attrs: Dict[str, torch.Tensor] = {}  # type: ignore[assignment]
+            self.fake_tensor_mode: Optional[FakeTensorMode] = None
+            self.submodules: Dict[torch.nn.Module, str] = {}
+        def trace(self) -> None:  # type: ignore[override]
+            raise ExportPassBaseError("ExportTracer doesn't support trace().")
+        def create_arg(self, a: Argument) -> torch.fx.Node:
+            if isinstance(a, torch.nn.Module):
+                if a not in self.submodules:
+                    name_submodule = f"submodule_{len(self.submodules)}"
+                    self.root.add_module(name_submodule, a)
+                    self.submodules[a] = name_submodule
+            elif isinstance(a, FakeTensor):
+                if not hasattr(a, "constant") or a.constant is None:
+                    raise ExportPassBaseError(f"Cannot add {a} to graph.")
+                a = a.constant
+            node = super().create_arg(a)
+            if (
+                isinstance(a, torch.Tensor)
+                and isinstance(node, torch.fx.Node)
+                and node.op == "get_attr"
+            ):
+                self.set_metadata(node, a)
+                self.callback.on_attr(ProxyValue(a, node))
+            return node
+        def set_metadata(
+            self, node: torch.fx.Node, value: Argument,
+        ) -> None:
+            # propagate the fake tensor or sym nodes
+            def make_val(
+                x: Argument,
+            ) -> Union[FakeTensor, torch.SymInt, torch.SymFloat, torch.SymBool, int, float, bool, str, None]:
+                if isinstance(x, FakeTensor):
+                    return x
+                elif isinstance(x, torch.Tensor):
+                    if x.is_quantized:
+                        # TODO (tmanlaibaatar) properly support Quantized FakeTensor
+                        x = torch.dequantize(x)
+                    try:
+                        assert self.fake_tensor_mode is not None
+                        # TODO we should allocate static shapes
+                        # for param/buffer values
+                        if isinstance(x, torch.nn.Parameter):
+                            fake_tensor = self.fake_tensor_mode.from_tensor(
+                                x, static_shapes=True
+                            )
+                        else:
+                            fake_tensor = self.fake_tensor_mode.from_tensor(x)
+                    except UnsupportedFakeTensorException:
+                        # TODO: This is just a workaround to get over the
+                        # x.as_subclass error
+                        print(
+                            "Fakeifying a Tensor subclass is not supported \
+                            right now. Instead a TensorMetadata is used."
+                        )
+                        fake_tensor = None
+                    return fake_tensor
+                elif isinstance(x, (torch.SymInt, torch.SymFloat, torch.SymBool, int, float, bool, str)):
+                    return x
+                else:
+                    return None
+            node.meta["val"] = pytree.tree_map(make_val, value)
+            # Set the tensor_metadata for values that do not have a corresponding FakeTensor
+            def make_tensor_meta(x: Argument) -> Optional[TensorMetadata]:
+                if not isinstance(x, FakeTensor) and isinstance(x, torch.Tensor):
+                    if x.is_quantized:
+                        # TODO (tmanlaibaatar) properly support Quantized FakeTensor
+                        x = torch.dequantize(x)
+                    try:
+                        assert self.fake_tensor_mode is not None
+                        _ = self.fake_tensor_mode.from_tensor(x)
+                        tensor_meta = None
+                    except UnsupportedFakeTensorException:
+                        # TODO: This is just a workaround to get over the
+                        # x.as_subclass error
+                        tensor_meta = _extract_tensor_metadata(x)
+                    return tensor_meta
+                else:
+                    return None
+            node.meta["tensor_meta"] = pytree.tree_map(make_tensor_meta, value)
+    class ExportInterpreter(fx.Interpreter):
+        def __init__(self, callback: "_ExportPassBaseDeprecatedDoNotUse", gm: fx.GraphModule) -> None:
+            super().__init__(gm)
+            self.callback = callback
+            self.node: torch.fx.Node = next(iter(gm.graph.nodes))
+        def placeholder(
+            self,
+            target: str,  # type: ignore[override]
+            args: Tuple[Argument, ...],
+            kwargs: Dict[str, Argument],
+        ) -> ProxyValue:
+            arg = super().placeholder(target, args, kwargs)
+            return self.callback.placeholder(target, arg, NodeMetadata(self.node.meta))
+        def output(
+            self,
+            target: torch.fx.node.Target,
+            args: Tuple[Argument, ...],
+            kwargs: Dict[str, Argument],
+        ) -> ProxyValue:
+            return self.callback.output(args[0], NodeMetadata(self.node.meta)).data
+        def call_function(
+            self,
+            target: torch.fx.node.Target,
+            args: Tuple[Argument, ...],
+            kwargs: Dict[str, Argument],
+        ) -> ProxyValue:
+            meta = NodeMetadata(self.node.meta)
+            if target == operator.getitem:
+                value, key = args
+                return self.callback.call_getitem(value, key, meta)
+            elif getattr(target, "__module__", None) in {"_operator", "math"}:
+                assert callable(target)
+                return self.callback.call_sym(target, args, meta)
+            elif target in _TORCH_SYM_OPS:
+                assert callable(target)
+                return self.callback.call_sym(target, args, meta)
+            elif isinstance(target, (torch._ops.OpOverload, torch._ops.OpOverloadPacket)):
+                return self.callback.call_operator(
+                    target,
+                    args,
+                    kwargs,
+                    meta,
+                )
+            elif target == torch.ops.higher_order.cond:
+                pred, true_fn, false_fn, inputs = args
+                return self.callback.call_cond(pred, true_fn, false_fn, inputs, meta)
+            elif target == torch.ops.higher_order.map_impl:
+                f, mapped_args, operands = args  # type: ignore[assignment]
+                return self.callback.call_map(f, mapped_args, operands, meta)
+            # For other unregistered HigherOrderOps, just interpret them blindly
+            elif isinstance(target, torch._ops.HigherOrderOperator):
+                return self.callback._fx(
+                    "call_function",
+                    target,
+                    args,
+                    kwargs,
+                    meta,
+                )
+            else:
+                raise ExportPassBaseError(f"Unsupported target type: {target}")
+        def get_attr(
+            self, target: str, args: Tuple[Argument, ...], kwargs: Dict[str, Argument]  # type: ignore[override]
+        ) -> Argument:
+            return super().get_attr(target, args, kwargs)
+        def call_module(
+            self,
+            target: torch.fx.node.Target,
+            args: Tuple[Argument, ...],
+            kwargs: Dict[str, Argument],
+        ) -> None:
+            raise ExportPassBaseError("call_module is not supported.")
+        def call_method(
+            self, target: str, args: Tuple[Argument, ...], kwargs: Dict[str, Argument]  # type: ignore[override]
+        ) -> None:
+            raise ExportPassBaseError("call_method is not supported.")
+        def run_node(self, n: torch.fx.Node) -> Argument:
+            self.node = n
+            self.callback.node_debug_str = n.format_node()
+            return super().run_node(n)
+    def __init__(self) -> None:
+        self.interpreter = PropagateUnbackedSymInts(
+            torch.fx.GraphModule(torch.nn.Module(), torch.fx.Graph())
+        )
+        self.tracer = self.ExportTracer(self, CodeGen())
+        self.fake_tensor_mode: Optional[FakeTensorMode] = None
+        self._initialized = True
+        self.node_debug_str: typing.Optional[str] = None
+    def _fx(
+        self,
+        kind: str,
+        target: torch.fx.node.Target,
+        args: Tuple[Argument, ...],
+        kwargs: Dict[str, Argument],
+        meta: NodeMetadata,
+    ) -> ProxyValue:
+        args_data, kwargs_data = pytree.tree_map_only(
+            ProxyValue, lambda x: x.data, (args, kwargs)
+        )
+        res_data = getattr(self.interpreter, kind)(target, args_data, kwargs_data)
+        args_proxy, kwargs_proxy = pytree.tree_map_only(
+            ProxyValue, lambda x: x.proxy, (args, kwargs)
+        )
+        name = None
+        if isinstance(target, torch._ops.OpOverload):
+            name = self.tracer.graph._target_to_str(target.overloadpacket.__name__)
+        res_proxy = self.tracer.create_proxy(kind, target, args_proxy, kwargs_proxy, name=name)
+        res_proxy.node.meta.update(meta.data)
+        if self.fake_tensor_mode and (shape_env := self.fake_tensor_mode.shape_env):
+            if symbol_to_path := compute_unbacked_bindings(shape_env, res_data):
+                res_proxy.node.meta["unbacked_bindings"] = symbol_to_path
+        self.tracer.set_metadata(res_proxy.node, res_data)
+        return ProxyValue(res_data, res_proxy)
+    def inputs(self, graph_module: torch.fx.GraphModule) -> List[Argument]:
+        # TODO(angelayi): Update this with what we decide to do for metadata in
+        # the exported graph module
+        if (args := graph_module.meta.get("args", None)) is not None:
+            return list(args)
+        def extract_input(node: torch.fx.Node) -> Optional[FakeTensor]:
+            if "val" in node.meta:
+                fake = node.meta["val"]
+                if hasattr(fake, "constant") and fake.constant is not None:
+                    return fake.constant
+                return fake
+            elif tensor_meta := node.meta.get("tensor_meta"):
+                assert self.fake_tensor_mode is not None
+                return FakeTensor(
+                    self.fake_tensor_mode,
+                    torch.empty(
+                        tensor_meta.shape,
+                        dtype=tensor_meta.dtype,
+                        device="meta",
+                        requires_grad=tensor_meta.requires_grad,
+                        memory_format=tensor_meta.memory_format,
+                    ),
+                    torch.device("cpu"),
+                )
+            elif len(node.users) == 0:
+                return None
+            raise ExportPassBaseError(
+                f"Cannot construct an input for graph module: {graph_module}.",
+            )
+        return [
+            extract_input(node)
+            for node in graph_module.graph.nodes
+            if node.op == "placeholder"
+        ]
+    def on_attr(self, attr: ProxyValue) -> None:
+        pass
+    def placeholder(self, name: str, arg: Argument, meta: NodeMetadata) -> ProxyValue:
+        arg_proxy = self.tracer.create_proxy("placeholder", name, (), {})
+        arg_proxy.node.meta = meta.data
+        self.tracer.set_metadata(arg_proxy.node, arg)
+        return ProxyValue(arg, arg_proxy)
+    def call_operator(
+        self,
+        op,
+        args: Tuple[Argument, ...],
+        kwargs: Dict[str, Argument],
+        meta: NodeMetadata,
+    ) -> ProxyValue:
+        return self._fx("call_function", op, args, kwargs, meta)
+    def call_sym(
+        self,
+        target: Fn,
+        args: Tuple[Argument, ...],
+        meta: NodeMetadata,
+    ) -> ProxyValue:
+        return self._fx("call_function", target, args, {}, meta)
+    def call_cond(
+        self,
+        pred: ProxyValue,
+        true_fn: torch.fx.GraphModule,
+        false_fn: torch.fx.GraphModule,
+        inputs: List[Argument],
+        meta: NodeMetadata,
+    ) -> ProxyValue:
+        true_branch = self.call_submodule(true_fn, tuple(inputs))
+        false_branch = self.call_submodule(false_fn, tuple(inputs))
+        assert true_branch is not None
+        assert false_branch is not None
+        return self._fx(
+            "call_function",
+            torch.ops.higher_order.cond,
+            (pred, true_branch.graph_module, false_branch.graph_module, list(inputs)),
+            {},
+            meta,
+        )
+    def call_map(
+        self,
+        f: torch.fx.GraphModule,
+        mapped_args: List[ProxyValue],
+        operands: List[ProxyValue],
+        meta: NodeMetadata,
+    ) -> ProxyValue:
+        xs = _unstack_pytree([arg.data for arg in mapped_args])[0]
+        f_branch = self.call_submodule(f, tuple(xs + [arg.data for arg in operands]))
+        assert f_branch is not None
+        return self._fx(
+            "call_function",
+            torch.ops.higher_order.map_impl,
+            (f_branch.graph_module, mapped_args, operands),
+            {},
+            meta,
+        )
+    def call_getitem(
+        self, value: ProxyValue, key: int, meta: NodeMetadata
+    ) -> ProxyValue:
+        return self._fx("call_function", operator.getitem, (value, key), {}, meta)
+    def output(self, results: List[Argument], meta: NodeMetadata) -> ProxyValue:
+        return self._fx("output", "output", (results,), {}, meta)
+    def call_submodule(
+        self, graph_module: fx.GraphModule, inputs: Tuple[Argument, ...]
+    ) -> PassResult:
+        prev_tracer, self.tracer = self.tracer, self.ExportTracer(
+            self, graph_module.graph._codegen
+        )
+        self.tracer.fake_tensor_mode = prev_tracer.fake_tensor_mode
+        interpreter = self.ExportInterpreter(self, graph_module)
+        prev_interpreter, self.interpreter = self.interpreter, torch.fx.Interpreter(  # type: ignore[assignment]
+            torch.fx.GraphModule(torch.nn.Module(), torch.fx.Graph())
+        )
+        inputs_data = pytree.tree_map_only(ProxyValue, lambda x: x.data, inputs)
+        with fx_traceback.preserve_node_meta():
+            interpreter.run(*inputs_data)
+        new_graph_module = torch.fx.GraphModule(self.tracer.root, self.tracer.graph)
+        self.tracer = prev_tracer
+        self.interpreter = prev_interpreter
+        return PassResult(
+            new_graph_module,
+            True,
+        )
+    def call(self, graph_module: fx.GraphModule) -> PassResult:
+        if not getattr(self, "_initialized", False):
+            raise ExportPassBaseError(
+                "ExportPass is not initialized with __init__().",
+            )
+        inputs = self.inputs(graph_module)
+        fake_tensor_mode = None
+        for i in inputs:
+            if isinstance(i, FakeTensor):
+                assert (
+                    fake_tensor_mode is None or fake_tensor_mode is i.fake_mode
+                ), "Multiple fake tensor mode detected."
+                fake_tensor_mode = i.fake_mode
+        if fake_tensor_mode is None:
+            self.tracer.fake_tensor_mode = FakeTensorMode(allow_non_fake_inputs=True)
+            fake_tensor_mode = nullcontext()  # type: ignore[assignment]
+            dispatcher_mode = nullcontext()  # type: ignore[assignment]
+        else:
+            fake_tensor_mode.allow_non_fake_inputs = True
+            self.tracer.fake_tensor_mode = fake_tensor_mode
+            dispatcher_mode = enable_python_dispatcher()  # type: ignore[assignment]
+        self.fake_tensor_mode = self.tracer.fake_tensor_mode
+        with fake_tensor_mode, dispatcher_mode:  # type: ignore[assignment, union-attr]
+            result = self.call_submodule(graph_module, tuple(inputs))
+        return result

.venv/lib/python3.11/site-packages/torch/_export/tools.py ADDED Viewed

	@@ -0,0 +1,146 @@

+# mypy: allow-untyped-defs
+import logging
+import warnings
+from typing import Any, Dict, Iterable, Optional, Tuple
+import torch
+import torch.export
+import torch.export._trace
+from torch._utils_internal import log_export_usage
+log = logging.getLogger(__name__)
+__all__ = ["report_exportability"]
+def _generate_inputs_for_submodules(
+    model: torch.nn.Module,
+    target_submodules: Iterable[str],
+    args: Tuple[Any, ...],
+    kwargs: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Tuple[Any, Any]]:
+    """
+    Generate inputs for targeting submdoules in the given model. Note that if two submodules refer to the same obj, this
+    function doesn't work.
+    Args:
+        model: root model.
+        inputs: inputs to the root model.
+        target_submodules: submodules that we want to generate inputs for.
+    Returns:
+        A dict that maps from submodule name to its inputs.
+    """
+    kwargs = kwargs or {}
+    handles = []
+    results = {}
+    submodule_to_names = {mod: name for name, mod in model.named_modules()}
+    def pre_forward(module, module_args, module_kwargs):
+        results[submodule_to_names[module]] = (module_args, module_kwargs)
+    try:
+        for name, mod in model.named_modules():
+            if name in target_submodules:
+                handles.append(
+                    mod.register_forward_pre_hook(pre_forward, with_kwargs=True)
+                )
+        model(*args, **kwargs)
+    except Exception as e:
+        warnings.warn(
+            f"Failed to generate submodule inputs because of the following error:\n{e}"
+        )
+    finally:
+        for h in handles:
+            h.remove()
+    return results
+def report_exportability(
+    mod: torch.nn.Module,
+    args: Tuple[Any, ...],
+    kwargs: Optional[Dict[str, Any]] = None,
+    *,
+    strict: bool = True,
+    pre_dispatch: bool = False,
+) -> Dict[str, Optional[Exception]]:
+    """
+    Report exportability issues for a module in one-shot.
+    Args:
+        mod: root module.
+        args: args to the root module.
+        kwargs: kwargs to the root module.
+    Returns:
+        A dict that maps from submodule name to the exception that was raised when trying to export it.
+        `None` means the module is exportable without issue.
+    Sample output:
+        {
+            '': UnsupportedOperatorException(func=<OpOverload(op='testlib.op_missing_meta', overload='default')>),
+            'submod_1': UnsupportedOperatorException(func=<OpOverload(op='testlib.op_missing_meta', overload='default')>),
+            'submod_2': None
+        }
+    """
+    log_export_usage(event="export.report_exportability")
+    kwargs = kwargs or {}
+    all_submod_names = [name for name, _ in mod.named_modules() if name != ""]
+    submod_inputs = _generate_inputs_for_submodules(mod, all_submod_names, args, kwargs)
+    tried_module_types = set()
+    report: Dict[str, Optional[Exception]] = {}
+    def try_export(module, module_name, args, kwargs):
+        nonlocal submod_inputs, report, strict, pre_dispatch, tried_module_types
+        if type(module) in tried_module_types:
+            return
+        tried_module_types.add(type(module))
+        if args is not None or kwargs is not None:
+            try:
+                torch.export._trace._export(
+                    module,
+                    args,
+                    kwargs,
+                    strict=strict,
+                    pre_dispatch=pre_dispatch,
+                )
+                report[module_name] = None
+                log.info("Successfully exported `%s`", module_name)
+                return
+            except Exception as e:
+                short_msg = repr(e).split("\n")[0]
+                log.warning(
+                    "Failed exporting `%s` with exception: %s", module_name, short_msg
+                )
+                report[module_name] = e
+        for name, submod in module.named_children():
+            sub_module_name = name if module_name == "" else f"{module_name}.{name}"
+            submod_args, submod_kwargs = submod_inputs.get(
+                sub_module_name, (None, None)
+            )
+            try_export(submod, sub_module_name, submod_args, submod_kwargs)
+        return
+    try_export(mod, "", args, kwargs)
+    unique_issues = set()
+    for exception in report.values():
+        if exception is not None:
+            key = repr(exception).split("\\n")[0]
+            unique_issues.add(key)
+    log.warning("Found %d export issues:", len(unique_issues))
+    for issue in unique_issues:
+        log.warning(issue)
+    return report

.venv/lib/python3.11/site-packages/torch/_export/verifier.py ADDED Viewed

	@@ -0,0 +1,456 @@

+# mypy: allow-untyped-defs
+import inspect
+import math
+import operator
+from collections.abc import Iterable
+from typing import Any, Dict, final, List, Tuple, Type, TYPE_CHECKING
+import torch
+from torch._ops import HigherOrderOperator, OpOverload
+from torch._subclasses.fake_tensor import FakeTensor
+from torch.export.graph_signature import (
+    CustomObjArgument,
+    InputKind,
+    SymIntArgument,
+    TensorArgument,
+    TokenArgument,
+)
+from torch.fx import GraphModule
+if TYPE_CHECKING:
+    from torch.export.exported_program import ExportedProgram
+class SpecViolationError(Exception):
+    pass
+def is_functional(op: OpOverload) -> bool:
+    return not op._schema.is_mutable
+def _check_has_fake_tensor(node: torch.fx.Node) -> None:
+    # TODO(angelayi): remove this in favor of _check_val
+    return _check_val(node)
+def _check_val(node: torch.fx.Node) -> None:
+    from torch.fx.experimental.symbolic_shapes import SymBool, SymFloat, SymInt
+    def _check_correct_val(val):
+        if val is None:
+            return True
+        elif isinstance(val, (int, bool, str, float)):
+            return True
+        elif isinstance(val, (torch.memory_format, torch.dtype, torch.device, torch.layout)):
+            return True
+        elif isinstance(val, (FakeTensor, torch.Tensor)):  # TODO(zhxchen17) Remove Tensor.
+            return True
+        elif isinstance(val, (SymInt, SymFloat, SymBool)):
+            return True
+        elif isinstance(val, CustomObjArgument):
+            return True
+        elif isinstance(val, Iterable):
+            return all(_check_correct_val(x) for x in val)
+        return False
+    def _no_returns(op):
+        if not isinstance(op, OpOverload):
+            return False
+        return len(op._schema.returns) == 0
+    if "val" not in node.meta:
+        if node.op == "call_function" and _no_returns(node.target):
+            return
+        raise SpecViolationError(f"Node.meta {node.name} is missing val field.")
+    val = node.meta["val"]
+    if not _check_correct_val(val):
+        raise SpecViolationError(f"Node.meta {node.name} has invalid val field {val}")
+def _check_torch_fn(node: torch.fx.Node) -> None:
+    torch_fn = node.meta.get("torch_fn")
+    if torch_fn is None:
+        raise SpecViolationError(f"Unable to find torch_fn metadata for node {node.name}")
+    if (
+        not isinstance(torch_fn, tuple) and
+        isinstance(torch_fn[0], str) and
+        isinstance(torch_fn[1], str)
+    ):
+        raise SpecViolationError(f"Node.meta {node.name} has invalid torch_fn field {torch_fn}")
+class _VerifierMeta(type):
+    _registry: Dict[str, Type['Verifier']] = {}
+    def __new__(metacls, name, bases, attrs):
+        if bases:
+            if "check" in attrs or "_check_graph_module" in attrs:
+                raise SyntaxError("Overriding method check is not allowed.")
+            assert "dialect" in attrs and attrs["dialect"] != "ATEN"
+        else:
+            assert "check" in attrs
+            assert "_check_graph_module" in attrs
+            assert attrs["dialect"] == "ATEN"
+        assert isinstance(attrs["dialect"], str)
+        ret = type.__new__(metacls, name, bases, attrs)
+        metacls._registry[attrs["dialect"]] = ret  # type: ignore[assignment]
+        return ret
+def getattr_recursive(obj: Any, target: str) -> Any:
+    target_atoms = target.split('.')
+    attr_itr = obj
+    for i, atom in enumerate(target_atoms):
+        if not hasattr(attr_itr, atom):
+            raise RuntimeError(f"Node referenced nonexistent target {'.'.join(target_atoms[:i])}")
+        attr_itr = getattr(attr_itr, atom)
+    return attr_itr
+class Verifier(metaclass=_VerifierMeta):
+    dialect = "ATEN"
+    def allowed_builtin_ops(self) -> List:
+        return [
+            operator.getitem,
+            operator.add,
+            operator.mul,
+            operator.sub,
+            operator.truediv,
+            operator.ge,
+            operator.le,
+            operator.gt,
+            operator.lt,
+            operator.eq,
+            operator.ne,
+            operator.floordiv,
+            operator.mod,
+            operator.and_,
+            operator.or_,
+            operator.not_,
+            operator.pow,
+            operator.neg,
+            operator.abs,
+            math.ceil,
+            math.floor,
+            math.trunc,
+        ]
+    def allowed_op_types(self) -> Tuple[Type[Any], ...]:
+        return (OpOverload, HigherOrderOperator)
+    def allowed_getattr_types(self) -> Tuple[Type[Any], ...]:
+        return (torch.fx.GraphModule,)
+    def check_valid_op(self, op):
+        pass
+    def check_additional(self, gm: GraphModule) -> None:
+        """
+        Additional checks that are specific to some dialects.
+        """
+    @final
+    def check(self, ep: "ExportedProgram") -> None:
+        self._check_graph_module(ep.graph_module)
+        _verify_exported_program_module_call_graph(ep)
+        _verify_exported_program_signature(ep)
+    @final
+    def _check_graph_module(self, gm: torch.fx.GraphModule) -> None:
+        def _allowed_getattr_types() -> Tuple[Type[Any], ...]:
+            ret = self.allowed_getattr_types()
+            assert not any(t is object for t in ret)
+            return ret
+        def _check_valid_op(op) -> None:
+            def _allowed_builtin_ops() -> List:
+                ret = self.allowed_builtin_ops()
+                assert all(inspect.isbuiltin(op) for op in ret)
+                return ret
+            def _allowed_op_types() -> Tuple[Type[Any], ...]:
+                ret = self.allowed_op_types()
+                assert not any(t is object for t in ret)
+                return ret
+            # TODO Remove this allowlist.
+            _allowed_torch_functions = (
+                torch.autograd.grad_mode.set_grad_enabled,
+                torch.sym_int,
+                torch.sym_float,
+                torch.sym_ite,
+                torch.sym_max,
+                torch.sym_min,
+                torch.sym_not,
+                torch.sym_sqrt,
+                # TODO (tmanlaibaatar)
+                # Predispatch export is able to contain autograd ops.
+                # These will be modeled as HOO later
+                torch._C._set_grad_enabled,
+            )
+            if not isinstance(op, _allowed_op_types()):
+                if op not in _allowed_builtin_ops() and op not in _allowed_torch_functions:
+                    raise SpecViolationError(
+                        f"Operator '{op}' is not an allowed operator type: {_allowed_op_types()}\n"
+                        f"Valid builtin ops: {_allowed_builtin_ops()}"
+                        f"Valid torch functions: {_allowed_torch_functions}"
+                    )
+            if isinstance(op, OpOverload):
+                # All ops functional
+                # TODO (tmanlaibaatar) more proper way is needed here
+                if self.dialect != "TRAINING" and not is_functional(op):
+                    raise SpecViolationError(
+                        f"operator '{op}' is not functional"
+                    )
+            self.check_valid_op(op)
+        for mod in gm.modules():
+            if not isinstance(mod, torch.fx.GraphModule):
+                continue
+            mod.graph.lint()
+            for node in mod.graph.nodes:
+                # TODO(T140410192): should have fake tensor for all dialects
+                if node.op in {"call_module", "call_method"}:
+                    raise SpecViolationError(
+                        f"call_module is not valid: got a class '{node.target}' ",
+                    )
+                elif node.op == "call_function":
+                    _check_val(node)
+                    _check_valid_op(node.target)
+                elif node.op == "get_attr":
+                    if not isinstance(node.target, str):
+                        raise SpecViolationError(
+                            f"Expected get_attr target to be string, but got {type(node.target)}"
+                        )
+                    attr = getattr_recursive(mod, node.target)
+                    if isinstance(attr, torch.nn.Module):
+                        def _is_type(name, ty):
+                            return isinstance(getattr(attr, name, None), ty)
+                        if type(attr).__name__ == "LoweredBackendModule":
+                            if _is_type("backend_id", str) \
+                                    and _is_type("processed_bytes", bytes) \
+                                    and _is_type("compile_specs", list) \
+                                    and hasattr(attr, "original_module"):
+                                continue
+                            else:
+                                backend_id = getattr(attr, "backend_id", None)
+                                processed_bytes = getattr(attr, "processed_bytes", None)
+                                compile_specs = getattr(attr, "compile_specs", None)
+                                raise SpecViolationError(
+                                    f"Invalid get_attr type {type(attr)}. \n"
+                                    f"LoweredBackendModule fields: "
+                                    f"backend_id(str) : {type(backend_id)}, "
+                                    f"processed_bytes(bytes) : {type(processed_bytes)}, "
+                                    f"compile_specs(list) : {type(compile_specs)}"
+                                )
+                    if not isinstance(attr, _allowed_getattr_types()):
+                        raise SpecViolationError(
+                            f"Invalid get_attr type {type(attr)}. \n"
+                            f"Valid get_attr types: {_allowed_getattr_types()}"
+                        )
+                elif node.op == "placeholder":
+                    _check_val(node)
+                # TODO(zhxchen17)
+                # elif node.op == "output":
+                #     _check_flattened_outputs()
+        self.check_additional(gm)
+class TrainingIRVerifier(Verifier):
+    dialect = "TRAINING"
+def _verify_exported_program_module_call_graph(exported_program) -> None:
+    module_call_graph = exported_program.module_call_graph
+    nodes = {
+        node.name for node in exported_program.graph.nodes
+    }
+    for entry in module_call_graph:
+        if entry.signature is not None:
+            for arg in entry.signature.inputs:
+                if arg.name and arg.name not in nodes:
+                    raise SpecViolationError(
+                        f"Input {arg.name} does not exist in the graph."
+                    )
+            for arg in entry.signature.outputs:
+                if arg.name and arg.name not in nodes:
+                    raise SpecViolationError(
+                        f"Output {arg.name} does not exist in the graph."
+                    )
+def _verify_exported_program_signature(exported_program) -> None:
+    # Check ExportedProgram signature matches
+    gs = exported_program.graph_signature
+    # Check every node in the signature exists in the graph
+    input_node_names = [node.name for node in exported_program.graph.nodes if node.op == "placeholder"]
+    if len(input_node_names) != len(gs.input_specs):
+        raise SpecViolationError(
+            f"Number of graph inputs ({len(input_node_names)}) "
+            f"does not match number of inputs in the graph signature ({len(gs.input_specs)})"
+        )
+    for input_spec, node in zip(gs.input_specs, input_node_names):
+        if isinstance(input_spec.arg, (TensorArgument, SymIntArgument)):
+            if input_spec.arg.name != node:
+                raise SpecViolationError(
+                    f"Input spec name {input_spec.arg.name} does not match node name {node}"
+                )
+        if input_spec.kind == InputKind.USER_INPUT:
+            continue
+        elif input_spec.kind == InputKind.PARAMETER:
+            if not isinstance(input_spec.arg, TensorArgument):
+                raise SpecViolationError(
+                    f"Parameter {input_spec.name} is not a tensor argument. Found {input_spec.arg} instead."
+                )
+            if input_spec.target is None:
+                raise SpecViolationError(
+                    f"InputSpec for {input_spec.name} has no target."
+                )
+            param = input_spec.target
+            if param not in exported_program.state_dict:
+                raise SpecViolationError(
+                    f"Parameter {param} is not in the state dict."
+                )
+            if not isinstance(exported_program.state_dict[param], torch.nn.Parameter):
+                raise SpecViolationError(
+                    f"State dict entry for parameter {param} is not an instance of torch.nn.Parameter."
+                )
+        elif input_spec.kind == InputKind.BUFFER:
+            if not isinstance(input_spec.arg, TensorArgument):
+                raise SpecViolationError(
+                    f"Buffer {input_spec.name} is not a tensor argument. Found {input_spec.arg} instead."
+                )
+            if input_spec.target is None:
+                raise SpecViolationError(
+                    f"InputSpec for {input_spec.name} has no target."
+                )
+            buffer = input_spec.target
+            if input_spec.persistent is None:
+                raise SpecViolationError(
+                    f"Buffer {buffer} is missing a persistence flag"
+                )
+            if input_spec.persistent is True and buffer not in exported_program.state_dict:
+                raise SpecViolationError(
+                    f"Buffer {buffer} is not in the state dict."
+                )
+            if input_spec.persistent is False and buffer in exported_program.state_dict:
+                raise SpecViolationError(
+                    f"Non-persistent buffer {buffer} is in the state dict, it should not be."
+                )
+        elif input_spec.kind == InputKind.CONSTANT_TENSOR:
+            if not isinstance(input_spec.arg, TensorArgument):
+                raise SpecViolationError(
+                    f"Constant tensor {input_spec.name} is not a tensor argument. Found {input_spec.arg} instead."
+                )
+            if input_spec.target is None:
+                raise SpecViolationError(
+                    f"InputSpec for {input_spec.name} has no target."
+                )
+            tensor_const = input_spec.target
+            if tensor_const not in exported_program.constants:
+                raise SpecViolationError(
+                    f"Constant tensor {tensor_const} is not in the constants dictionary."
+                )
+        elif input_spec.kind == InputKind.CUSTOM_OBJ:
+            if not isinstance(input_spec.arg, CustomObjArgument):
+                raise SpecViolationError(
+                    f"Custom object {input_spec.name} is not a custom object argument. Found {input_spec.arg} instead."
+                )
+            if input_spec.target is None:
+                raise SpecViolationError(
+                    f"InputSpec for {input_spec.name} has no target."
+                )
+            custom_obj = input_spec.target
+            if custom_obj not in exported_program.constants:
+                raise SpecViolationError(
+                    f"Custom object {custom_obj} is not in the constants dictionary."
+                )
+        elif input_spec.kind == InputKind.TOKEN:
+            if not isinstance(input_spec.arg, TokenArgument):
+                raise SpecViolationError(
+                    f"Constant tensor {input_spec.name} is not a tensor argument. Found {input_spec.arg} instead."
+                )
+        else:
+            raise SpecViolationError(
+                f"Unknown InputKind {input_spec.kind}."
+            )
+    # Check outputs
+    output_node = list(exported_program.graph.nodes)[-1]
+    assert output_node.op == "output"
+    output_nodes = [
+        arg.name if isinstance(arg, torch.fx.Node) else arg
+        for arg in output_node.args[0]
+    ]
+    if len(output_nodes) != len(gs.output_specs):
+        raise SpecViolationError(
+            f"Number of output nodes {len(output_nodes)} is different "
+            "Than the number of outputs specified by the graph signature: \n"
+            f"Number of mutated buffers: {len(gs.buffers_to_mutate)}. \n"
+            f"Number of user outputs: {len(gs.user_outputs)}. \n"
+        )
+    num_tokens = len(gs.output_tokens)
+    end = len(gs.buffers_to_mutate) + len(gs.user_inputs_to_mutate) + num_tokens
+    mutate_nodes: List[str] = output_nodes[num_tokens:end]
+    user_output_nodes = output_nodes[end:end + len(gs.user_outputs)]
+    for mutation_node in mutate_nodes:
+        if mutation_node in gs.buffers_to_mutate:
+            if gs.buffers_to_mutate[mutation_node] not in gs.buffers:
+                raise SpecViolationError(
+                    f"Buffer output {mutation_node} does not point to a buffer that exists. \n"
+                    f"Dict of buffers that are mutated, in order: {gs.buffers_to_mutate} \n"
+                    f"Buffer nodes available: {gs.buffers} \n"
+                )
+        elif mutation_node in gs.user_inputs_to_mutate:
+            if gs.user_inputs_to_mutate[mutation_node] not in gs.user_inputs:
+                raise SpecViolationError(
+                    f"User input output {mutation_node} does not point to a user input that exists. \n"
+                    f"Dict of user inputs that are mutated, in order: {gs.user_inputs_to_mutate} \n"
+                    f"User input nodes available: {gs.user_inputs} \n")
+        else:
+            raise SpecViolationError(
+                f"Mutation node {mutation_node} is neither a buffer nor a user input. "
+                f"Buffers to mutate: {gs.buffers_to_mutate}, User inputs to mutate: {gs.user_inputs_to_mutate}"
+            )
+    for user_output_node, user_output_name in zip(user_output_nodes, gs.user_outputs):
+        if user_output_node != user_output_name:
+            raise SpecViolationError(
+                f"User output {user_output_node} is not in the correct "
+                "order or is not found in the "
+                f"exported program's user_output list: {gs.user_outputs}. "
+            )
+def load_verifier(dialect: str) -> Type[Verifier]:
+    if dialect == "ATEN" or dialect == "":
+        return _VerifierMeta._registry.get(dialect, Verifier)
+    return _VerifierMeta._registry[dialect]

.venv/lib/python3.11/site-packages/torch/_export/wrappers.py ADDED Viewed

	@@ -0,0 +1,121 @@

+# mypy: allow-untyped-defs
+from contextlib import contextmanager
+import torch
+import torch._custom_ops
+from torch._C import DispatchKey
+from torch._higher_order_ops.strict_mode import strict_mode
+from torch._higher_order_ops.utils import autograd_not_implemented
+from torch._ops import HigherOrderOperator
+from torch._subclasses.fake_tensor import FakeTensorMode
+from torch.fx.experimental.proxy_tensor import ProxyTorchDispatchMode, track_tensor_tree
+from torch.utils import _pytree as pytree
+class ExportTracepoint(HigherOrderOperator):
+    def __init__(self):
+        super().__init__("_export_tracepoint")
+    def __call__(self, *args, **kwargs):
+        return super().__call__(*args, **kwargs)
+_export_tracepoint = ExportTracepoint()
+@_export_tracepoint.py_impl(ProxyTorchDispatchMode)
+def export_tracepoint_dispatch_mode(mode, *args, **kwargs):
+    p_args, p_kwargs = pytree.tree_map(mode.tracer.unwrap_proxy, (args, kwargs))
+    proxy = mode.tracer.create_proxy(
+        "call_function", _export_tracepoint, p_args, p_kwargs
+    )
+    return track_tensor_tree(args, proxy, constant=None, tracer=mode.tracer)
+@_export_tracepoint.py_impl(FakeTensorMode)
+def export_tracepoint_fake_tensor_mode(mode, *args, **kwargs):
+    with mode:
+        return args
+@_export_tracepoint.py_functionalize_impl
+def export_tracepoint_functional(ctx, *args, **kwargs):
+    unwrapped_args = ctx.unwrap_tensors(args)
+    unwrapped_kwargs = ctx.unwrap_tensors(kwargs)
+    with ctx.redispatch_to_next():
+        out = _export_tracepoint(*unwrapped_args, **unwrapped_kwargs)
+        return ctx.wrap_tensors(out)
+_export_tracepoint.py_impl(DispatchKey.Autograd)(
+    autograd_not_implemented(_export_tracepoint, deferred_error=True)
+)
+@_export_tracepoint.py_impl(DispatchKey.CPU)
+def export_tracepoint_cpu(*args, **kwargs):
+    return args
+def _wrap_submodule(mod, path, module_call_specs):
+    assert isinstance(mod, torch.nn.Module)
+    assert path != ""
+    submodule = mod
+    for name in path.split("."):
+        if not hasattr(submodule, name):
+            raise RuntimeError(f"Couldn't find submodule at path {path}")
+        submodule = getattr(submodule, name)
+    def update_module_call_signatures(path, in_spec, out_spec):
+        if path in module_call_specs:
+            assert module_call_specs[path]["in_spec"] == in_spec
+            assert module_call_specs[path]["out_spec"] == out_spec
+        module_call_specs[path] = {"in_spec": in_spec, "out_spec": out_spec}
+    def check_flattened(flat_args):
+        for a in flat_args:
+            if not (isinstance(a, (torch.Tensor, str, int, float, bool)) or a is None):
+                raise AssertionError(
+                    f"Only Tensors or scalars are supported as pytree flattened inputs, got: {a}"
+                )
+    def pre_hook(module, args, kwargs):
+        flat_args, in_spec = pytree.tree_flatten((args, kwargs))
+        check_flattened(flat_args)
+        flat_args = _export_tracepoint(*flat_args, kind="module_call_inputs", path=path)
+        args, kwargs = pytree.tree_unflatten(flat_args, in_spec)
+        return args, kwargs
+    def post_hook(module, args, kwargs, res):
+        _, in_spec = pytree.tree_flatten((args, kwargs))
+        flat_res, out_spec = pytree.tree_flatten(res)
+        check_flattened(flat_res)
+        flat_res = _export_tracepoint(*flat_res, kind="module_call_outputs", path=path)
+        update_module_call_signatures(path, in_spec, out_spec)
+        return pytree.tree_unflatten(flat_res, out_spec)
+    pre_handle = submodule.register_forward_pre_hook(pre_hook, with_kwargs=True)
+    post_handle = submodule.register_forward_hook(post_hook, with_kwargs=True)
+    return pre_handle, post_handle
+@contextmanager
+def _wrap_submodules(f, preserve_signature, module_call_signatures):
+    handles = []
+    try:
+        for path in preserve_signature:
+            handles.extend(_wrap_submodule(f, path, module_call_signatures))
+        yield
+    finally:
+        for handle in handles:
+            handle.remove()
+def _mark_strict_experimental(cls):
+    def call(self, *args):
+        return strict_mode(self, args)
+    cls.__call__ = call
+    return cls

.venv/lib/python3.11/site-packages/torch/_lazy/__init__.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# mypy: allow-untyped-defs
+import torch._C._lazy
+from torch.utils._pytree import tree_flatten, tree_unflatten
+from .closure import add_step_closure, run_step_closures
+def mark_step(device: str = "", wait=False):
+    """Triggers a mark step, which amounts to
+    - collecting a group of 'live' lazy tensors to index into the compilation cache
+      (lowering/compiling their IR graphs if not cached)
+    - kicking off execution of the compiled function
+    - (optionally, wait=True) waiting for cpu-side execution to complete (does not sync the accelerator)
+    """
+    # TODO(whc) expand this to include backend hooks and align with XLA backend needs
+    torch._C._lazy._mark_step(device, [], wait=wait)
+    run_step_closures()
+def wait_device_ops(devices=None):
+    """Waits for all the async operations on the given devices to complete.
+    Args:
+      devices (string..., optional): The devices whose async ops need to be waited
+        for. If empty, all the local devices will be waited for.
+    """
+    if devices is None:
+        devices = []
+    torch._C._lazy._wait_device_ops(devices=devices)
+def sync_multi(tensors, devices):
+    """
+    Sync the list of lazy tensors so there IR get lowered for the activate backend
+    and the compiled computation graph get cached.
+    """
+    torch._C._lazy._sync_multi(tensors, devices)
+def get_tensor_id(tensor):
+    """Return a unique id of the lazy tensor maintained by LTC"""
+    return torch._C._lazy._get_tensor_id(tensor)
+def to_cpu(tensors, devices=None):
+    devices = devices or ["lazy"]
+    flattened, spec = tree_flatten(tensors)
+    sync_multi(flattened, devices)
+    return tree_unflatten([t.to("cpu") for t in flattened], spec)
+def save(tensors, *args, **kwargs):
+    torch.save(to_cpu(tensors), *args, **kwargs)

.venv/lib/python3.11/site-packages/torch/_lazy/__pycache__/ir_cache.cpython-311.pyc ADDED Viewed

Binary file (859 Bytes). View file

.venv/lib/python3.11/site-packages/torch/_lazy/__pycache__/ts_backend.cpython-311.pyc ADDED Viewed

Binary file (522 Bytes). View file

.venv/lib/python3.11/site-packages/torch/_lazy/computation.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# mypy: allow-untyped-defs
+import torch._C._lazy
+import torch._C._lazy_ts_backend
+def get_tensors_ts_device_data_node(tensors):
+    """Return tensor ids and eager tensors for DeviceData nodes in the
+    IR for the passed in lazy tensors.
+    TODO: This API is currently ts backend specific. We are working on
+    generalizing it to all backends including XLA.
+    """
+    return torch._C._lazy_ts_backend._get_tensors_ts_device_data_node(tensors)
+def get_graph_hash(tensors):
+    """Return the graph hash for the passed in lazy tensors"""
+    return torch._C._lazy._get_graph_hash(tensors)
+def run_cached_graph(hash_str, graph_inputs):
+    """Running the cached computation graph with the given inputs
+    TODO: This API is currently ts backend specific. We are working on
+    generalizing it to all backends including XLA.
+    """
+    return torch._C._lazy_ts_backend._run_cached_graph(hash_str, graph_inputs)

.venv/lib/python3.11/site-packages/torch/_lazy/config.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# mypy: allow-untyped-defs
+import torch._C._lazy
+def get_force_fallback():
+    """Get the config used to force LTC fallback"""
+    return torch._C._lazy._get_force_fallback()
+def set_force_fallback(configval):
+    """Set the config used to force LTC fallback"""
+    torch._C._lazy._set_force_fallback(configval)
+def set_reuse_ir(val: bool):
+    """Set the config to reuse IR nodes for faster tracing"""
+    torch._C._lazy._set_reuse_ir(val)

.venv/lib/python3.11/site-packages/torch/_lazy/debug.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# mypy: allow-untyped-defs
+import torch._C._lazy
+def render_ir_graph(tensors):
+    """Return a text dump of the LTC IR graph in dot format for the tensors.
+    The text can be processed by tools like dot to be rendered in pdf,png etc."""
+    return torch._C._lazy._get_tensors_dot(tensors)
+def dump_ir(tensors, ir_format):
+    """Return a dump of the tensors in the specified format.
+    Valid format are
+    - text: for LTC IR
+    - backend: for the activate backend IR
+    """
+    if ir_format == "text":
+        return torch._C._lazy._get_tensors_text(tensors)
+    elif ir_format == "backend":
+        return torch._C._lazy._get_tensors_backend(tensors)
+    else:
+        raise RuntimeError(f"Unrecognized IR format: {ir_format}")

.venv/lib/python3.11/site-packages/torch/_lazy/device_context.py ADDED Viewed

	@@ -0,0 +1,26 @@

+# mypy: allow-untyped-defs
+import threading
+from typing import Any, Dict
+import torch._C._lazy
+class DeviceContext:
+    _CONTEXTS: Dict[str, Any] = {}
+    _CONTEXTS_LOCK = threading.Lock()
+    def __init__(self, device):
+        self.device = device
+def get_device_context(device=None):
+    if device is None:
+        device = torch._C._lazy._get_default_device_type()
+    else:
+        device = str(device)
+    with DeviceContext._CONTEXTS_LOCK:
+        devctx = DeviceContext._CONTEXTS.get(device, None)
+        if devctx is None:
+            devctx = DeviceContext(device)
+            DeviceContext._CONTEXTS[device] = devctx
+        return devctx

.venv/lib/python3.11/site-packages/torch/_lazy/extract_compiled_graph.py ADDED Viewed

	@@ -0,0 +1,225 @@

+# mypy: allow-untyped-defs
+import copy
+import dataclasses
+import itertools
+import os
+from typing import Any, Callable, Dict, List
+import torch
+import torch._lazy as lazy
+import torch._lazy.metrics as metrics
+from torch import fx
+from torch._lazy import computation, debug as lazy_debug
+from torch._lazy.tensor_factory_functions import tensor_factory_functions
+debug = os.environ.get("debug_extract_compiled_graph") is not None
+@dataclasses.dataclass
+class GraphInputMatcher:
+    """
+    The GraphInputMatcher class setup the graph inputs for future calls after lazy tracing.
+    Specifically, those graph inputs corresponding to method parameters should be replaced with the
+    arguments for the current call.
+    tensor_id_to_arg_idx maps the tensor id to the parameter index.
+    graph_input_tensor_ids, graph_input_ivalues list the tensor_id and ivalue for each of the
+    TS/XLA graph inputs.
+    """
+    tensor_id_to_arg_idx: Dict[int, int]
+    graph_input_tensor_ids: List[int]
+    # there are 2 categories of graph_input_tensors.
+    # Category 1: those whose id are not found in tensor_id_to_arg_idx. These are
+    # most likely const tensors and we can get its content from graph_input_tensors
+    # Category 2: those whose id are found in tensor_id_to_arg_idx. We should get
+    #  the tensor from method arguments
+    graph_input_ivalues: List[Any]
+    # get the real graph input tensors
+    def __call__(self, args):
+        real_input = []
+        for tensor_id, traced_ivalue in zip(
+            self.graph_input_tensor_ids, self.graph_input_ivalues
+        ):
+            arg_idx = self.tensor_id_to_arg_idx.get(tensor_id, None)
+            if arg_idx is None:
+                inp = traced_ivalue
+            else:
+                inp = args[arg_idx]
+            real_input.append(inp)
+        return real_input
+class ReturnValueHandler:
+    r"""
+    When ltc_sync_multi is called on multi tensors, the compiled graph
+    will contain output only for unique tensors - if a tensor appears multiple
+    times in the input to _ltc_sync_multi, only the first occurance matters.
+    However from python level, we still expect multi tensors returned with duplciation
+    even if the TS graph dedup the output. e.g. for method:
+      def forward(self, a):
+        return a, a
+    the TS graph captured by LTC will return a single tensor, but Python method expects 2.
+    This class dedup the lazy tensors first to get the index that will be used
+    to duplicate the eager tensors later.
+    """
+    def __init__(self, lazy_out_list):
+        self.index: List[List[int]] = []
+        self.total_count = len(lazy_out_list)
+        tensor_id_to_idx: Dict[int, int] = {}
+        for dup_idx, lazy_tensor in enumerate(lazy_out_list):
+            uniq_idx = tensor_id_to_idx.get(id(lazy_tensor), None)
+            if uniq_idx is not None:
+                self.index[uniq_idx].append(dup_idx)
+            else:
+                uniq_idx = len(self.index)
+                self.index.append([dup_idx])
+                tensor_id_to_idx[id(lazy_tensor)] = uniq_idx
+    def duplicate_eager_tensors(self, eager_tensor_list):
+        duplicated_list = [None] * self.total_count
+        assert len(eager_tensor_list) == len(self.index)
+        for uniq_idx, eager_tensor in enumerate(eager_tensor_list):
+            for dup_idx in self.index[uniq_idx]:
+                duplicated_list[dup_idx] = eager_tensor
+        return duplicated_list
+def force_lazy_device(model: fx.GraphModule):
+    """
+    Factory methods in a Fx graph may create tensors for a specific eager devices.
+    If we take no actions, those eager tensors will be mixed with lazy tensors and
+    cause crash. This method overwrite those eager device to lazy device.
+    """
+    def tolazydevice(dev):
+        if isinstance(dev, torch.device):
+            return torch.device("lazy", index=dev.index)
+        return dev
+    def hasDeviceArg(args, kwargs):
+        return any(
+            isinstance(arg, torch.device)
+            for arg in itertools.chain(args, kwargs.values())
+        )
+    for nd in model.graph.nodes:
+        nd.args = tuple(tolazydevice(arg) for arg in nd.args)
+        nd.kwargs = {k: tolazydevice(v) for k, v in nd.kwargs.items()}
+        # For torchbench like yolov3, hf_Bart, dynamo generates Fx graph that return
+        # eager tensors on the default device
+        # (check https://gist.github.com/shunting314/eabdf6c769c59bc384469717b8f9bb7f for yolove,
+        # and https://gist.github.com/shunting314/8d5e2d9348a3258959d3954186c48814 for hf_Bart).
+        # To force those tensors on the lazy device, we can not simply override
+        # the device argument since there is no explicit device argument.
+        # What we are doing here is, for the list of covered tensor factory methods
+        # we add a lazy device argument explicity.
+        #
+        # TODO: This solution is no ideal since we may miss some factory methods. In future
+        # when we support lazy mode, this method can be replaced by that.
+        if nd.target in tensor_factory_functions and not hasDeviceArg(
+            nd.args, nd.kwargs
+        ):
+            kwargs = dict(nd.kwargs)  # nd.kwargs is immutable. make a mutable copy.
+            kwargs["device"] = torch.device("lazy")
+            nd.kwargs = kwargs
+    model.recompile()
+def get_fallback_ops():
+    fallback_ops = []
+    for opname in metrics.counter_names():
+        if "aten::" not in opname:
+            continue
+        val = int(metrics.counter_value(opname))
+        if val > 0:
+            fallback_ops.append(f"{opname}={val}")
+    return fallback_ops
+def extract_compiled_graph(model: fx.GraphModule, example_inputs) -> Callable:
+    """
+    Optimize an eager model with LTC and returns a wrapper to execute the
+    compiled graph directly without retracing. It depends on other mechanisms
+    like TorchDynamo guards to guarantee the returned wrapper is only called
+    when it's safe.
+    """
+    lazy_args = [arg.to(device="lazy") for arg in example_inputs]
+    args_tensor_ids = [lazy.get_tensor_id(lazy_arg) for lazy_arg in lazy_args]
+    tensor_id_to_arg_idx = {tensor_id: i for i, tensor_id in enumerate(args_tensor_ids)}
+    lazy_model = copy.deepcopy(model).to(device=torch.device("lazy"))
+    force_lazy_device(lazy_model)
+    # This line executes lazy tracing and enable us extracting compiled graph later
+    metrics.reset()
+    lazy_out = lazy_model(*lazy_args)
+    fallback_ops = get_fallback_ops()
+    metrics.reset()
+    if len(fallback_ops) > 0:
+        raise RuntimeError(
+            f"Fail to extact the compiled graph because of fallback: {','.join(fallback_ops)}"
+        )
+    if not isinstance(lazy_out, (tuple, list)):
+        lazy_out = (lazy_out,)
+    args_and_out = tuple(lazy_args) + tuple(lazy_out)
+    return_value_handler = ReturnValueHandler(args_and_out)
+    if debug:
+        print("Fx code:\n", model.code)
+        print("LTC IR:", lazy_debug.dump_ir(args_and_out, "text"))
+    # TODO: this part is TS backend specific for now and will be generalized to
+    # support XLA
+    (
+        graph_input_tensor_ids,
+        graph_input_ivalues,
+    ) = computation.get_tensors_ts_device_data_node(args_and_out)
+    assert len(graph_input_tensor_ids) == len(graph_input_ivalues)
+    graph_input_matcher = GraphInputMatcher(
+        tensor_id_to_arg_idx, graph_input_tensor_ids, graph_input_ivalues
+    )
+    graph_hash = computation.get_graph_hash(args_and_out)
+    if debug:
+        print("graph_hash", graph_hash)
+        print(f"args_tensor_ids {args_tensor_ids}")
+        print("tensor ids from device data:", graph_input_tensor_ids)
+    # sync the list of output tensors so the computation graph for these
+    # tensors will be cached. Those computation graphs can be retrieved
+    # by graph hash later.
+    lazy.sync_multi(args_and_out, [])
+    def optimized_mod(*args):
+        if len(args_and_out) == 0:
+            return ()
+        graph_input = graph_input_matcher(args)
+        res = return_value_handler.duplicate_eager_tensors(
+            computation.run_cached_graph(graph_hash, graph_input)
+        )
+        assert len(res) == len(args_and_out)
+        for i, arg in enumerate(args):
+            # only copy those tensors that get inplace updated
+            if arg is not res[i]:
+                arg.copy_(res[i])
+        # skip the args
+        return res[len(args) :]
+    return optimized_mod

.venv/lib/python3.11/site-packages/torch/_lazy/metrics.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# mypy: allow-untyped-defs
+import torch._C._lazy
+def reset():
+    """Resets all metric counters."""
+    torch._C._lazy._reset_metrics()
+def counter_names():
+    """Retrieves all the currently active counter names."""
+    return torch._C._lazy._counter_names()
+def counter_value(name: str):
+    """Return the value of the counter with the speficied name"""
+    return torch._C._lazy._counter_value(name)
+def metrics_report():
+    """Return the combined (lazy core and backend) metric report"""
+    return torch._C._lazy._metrics_report()

.venv/lib/python3.11/site-packages/torch/_lazy/ts_backend.py ADDED Viewed

	@@ -0,0 +1,7 @@

+# mypy: allow-untyped-defs
+import torch._C._lazy_ts_backend
+def init():
+    """Initializes the lazy Torchscript backend"""
+    torch._C._lazy_ts_backend._init()

.venv/lib/python3.11/site-packages/torch/multiprocessing/_atfork.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# mypy: allow-untyped-defs
+import sys
+__all__ = ["register_after_fork"]
+if sys.platform == "win32":
+    import multiprocessing.util as _util
+    def _register(func):
+        def wrapper(arg):
+            func()
+        _util.register_after_fork(_register, wrapper)
+else:
+    import os
+    def _register(func):
+        os.register_at_fork(after_in_child=func)
+def register_after_fork(func):
+    """Register a callable to be executed in the child process after a fork.
+    Note:
+        In python < 3.7 this will only work with processes created using the
+        ``multiprocessing`` module. In python >= 3.7 it also works with
+        ``os.fork()``.
+    Args:
+        func (function): Function taking no arguments to be called in the child after fork
+    """
+    _register(func)

.venv/lib/python3.11/site-packages/torch/multiprocessing/pool.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import multiprocessing.pool
+import multiprocessing.util as util
+from .queue import SimpleQueue
+def clean_worker(*args, **kwargs):
+    import gc
+    multiprocessing.pool.worker(*args, **kwargs)
+    # Regular multiprocessing workers don't fully clean up after themselves,
+    # so we have to explicitly trigger garbage collection to make sure that all
+    # destructors are called...
+    gc.collect()
+class Pool(multiprocessing.pool.Pool):
+    """Pool implementation which uses our version of SimpleQueue.
+    This lets us pass tensors in shared memory across processes instead of
+    serializing the underlying data.
+    """
+    def _setup_queues(self):
+        self._inqueue = SimpleQueue()
+        self._outqueue = SimpleQueue()
+        self._quick_put = self._inqueue._writer.send
+        self._quick_get = self._outqueue._reader.recv
+    def _repopulate_pool(self):
+        """Increase the number of pool processes to the specified number.
+        Bring the number of pool processes up to the specified number, for use after
+        reaping workers which have exited.
+        """
+        for i in range(self._processes - len(self._pool)):
+            # changed worker -> clean_worker
+            args = (
+                self._inqueue,
+                self._outqueue,
+                self._initializer,
+                self._initargs,
+                self._maxtasksperchild,
+            )
+            if hasattr(self, "_wrap_exception"):
+                args += (self._wrap_exception,)
+            w = self.Process(target=clean_worker, args=args)
+            self._pool.append(w)
+            w.name = w.name.replace("Process", "PoolWorker")
+            w.daemon = True
+            w.start()
+            util.debug("added worker")

.venv/lib/python3.11/site-packages/torch/multiprocessing/queue.py ADDED Viewed

	@@ -0,0 +1,43 @@

+# mypy: allow-untyped-defs
+import io
+import multiprocessing.queues
+import pickle
+from multiprocessing.reduction import ForkingPickler
+class ConnectionWrapper:
+    """Proxy class for _multiprocessing.Connection which uses ForkingPickler for object serialization."""
+    def __init__(self, conn):
+        self.conn = conn
+    def send(self, obj):
+        buf = io.BytesIO()
+        ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump(obj)
+        self.send_bytes(buf.getvalue())
+    def recv(self):
+        buf = self.recv_bytes()
+        return pickle.loads(buf)
+    def __getattr__(self, name):
+        if "conn" in self.__dict__:
+            return getattr(self.conn, name)
+        raise AttributeError(f"'{type(self).__name__}' object has no attribute 'conn'")
+class Queue(multiprocessing.queues.Queue):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._reader: ConnectionWrapper = ConnectionWrapper(self._reader)
+        self._writer: ConnectionWrapper = ConnectionWrapper(self._writer)
+        self._send = self._writer.send
+        self._recv = self._reader.recv
+class SimpleQueue(multiprocessing.queues.SimpleQueue):
+    def _make_methods(self):
+        if not isinstance(self._reader, ConnectionWrapper):
+            self._reader: ConnectionWrapper = ConnectionWrapper(self._reader)
+            self._writer: ConnectionWrapper = ConnectionWrapper(self._writer)
+        super()._make_methods()  # type: ignore[misc]

.venv/lib/python3.11/site-packages/torch/multiprocessing/reductions.py ADDED Viewed

	@@ -0,0 +1,647 @@

+# mypy: allow-untyped-defs
+import multiprocessing
+import os
+import threading
+from multiprocessing.reduction import ForkingPickler
+from multiprocessing.util import register_after_fork
+from typing import Union
+import torch
+from torch._namedtensor_internals import check_serializing_named_tensor
+try:
+    # Early load resource_sharer to prevent a partially initialized instance
+    # from being inherited in a forked child process. The reduce_storage method
+    # requires this module indirectly through DupFd(). The built-in mp.Queue
+    # class pickles arguments in a background thread which may overlap with the
+    # fork.
+    import multiprocessing.resource_sharer
+except ImportError:
+    pass
+class StorageWeakRef:
+    r"""A weak reference to a Storage.
+    The cdata member is a Python number containing the integer representation of
+    the Storage pointer.
+    """
+    __slots__ = ["cdata", "_free_weak_ref"]
+    def __init__(self, storage):
+        self.cdata = storage._weak_ref()
+        # Save a direct reference to _free_weak_ref because the `torch` module
+        # might be cleared during Python shutdown before this module is cleared.
+        self._free_weak_ref = torch.Storage._free_weak_ref  # type: ignore[attr-defined]
+    @classmethod
+    def from_weakref(cls, cdata):
+        instance = cls.__new__(cls)
+        instance.cdata = cdata
+        instance._free_weak_ref = torch.Storage._free_weak_ref  # type: ignore[attr-defined]
+        return instance
+    def expired(self):
+        return torch.Storage._expired(self.cdata)  # type: ignore[attr-defined]
+    def __del__(self):
+        self._free_weak_ref(self.cdata)
+    def __hash__(self):
+        return self.cdata
+    def __eq__(self, other):
+        if id(self) == id(other):
+            return True
+        return self.cdata == other.cdata
+class SharedCache(dict):
+    """Dictionary from multiprocessing handles to StorageWeakRef."""
+    def __init__(self) -> None:
+        # free_dead_references() is called if the len exceeds the current
+        # limit. The limit scales with the number of remaining live objects.
+        self.limit = 128
+        # `fork` inherits lock state, so in case we fork when the lock is held,
+        # we register a function to reset the lock to a new object to avoid
+        # possible deadlocks, following python multiprocessing library design.
+        self._after_fork()
+        register_after_fork(self, SharedCache._after_fork)
+    def _after_fork(self):
+        self.lock = threading.Lock()
+    def get(self, key):
+        with self.lock:
+            return dict.get(self, key)
+    def __setitem__(self, key, storage_ref):
+        with self.lock:
+            dict.__setitem__(self, key, storage_ref)
+            if len(self) > self.limit:
+                self.free_dead_references()
+    def free_dead_references(self):
+        live = 0
+        for key, storage_ref in list(self.items()):
+            if storage_ref.expired():
+                del self[key]
+            else:
+                live += 1
+        self.limit = max(128, live * 2)
+# mapping from handles to StorageWeakRef objects
+shared_cache = SharedCache()
+def rebuild_event(device, handle):
+    return torch.cuda.Event.from_ipc_handle(device, handle)
+def reduce_event(event):
+    handle = event.ipc_handle()
+    return (rebuild_event, (event.device, handle))
+def rebuild_tensor(cls, storage, metadata):
+    storage_offset, size, stride, requires_grad = metadata
+    t = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
+    if cls == torch.nn.parameter.Parameter:
+        # we have to pass requires_grad into constructor, rather than set it as an
+        # attribute later, because it's an important check for Integer Tensors to
+        # have requires_grad=False (or else they raise an error)
+        t = torch.nn.parameter.Parameter(t, requires_grad=requires_grad)
+    else:
+        t.requires_grad = requires_grad
+    return t
+def rebuild_meta_tensor(
+    tensor_cls,
+    tensor_size,
+    tensor_stride,
+    tensor_offset,
+    dtype,
+    storage_size_bytes,
+    requires_grad,
+):
+    untyped_storage = torch.UntypedStorage(storage_size_bytes, device="meta")
+    typed_storage = torch.TypedStorage(
+        wrap_storage=untyped_storage, dtype=dtype, _internal=True
+    )
+    t = torch._utils._rebuild_tensor(
+        typed_storage,
+        tensor_offset,
+        tensor_size,
+        tensor_stride,
+    )
+    if tensor_cls == torch.nn.parameter.Parameter:
+        # It is crucial for integer tensors to receive
+        # the requires_grad=False as an argument in the constructor
+        t = torch.nn.parameter.Parameter(t, requires_grad=requires_grad)
+    else:
+        t.requires_grad = requires_grad
+    return t
+def rebuild_cuda_tensor(
+    tensor_cls,
+    tensor_size,
+    tensor_stride,
+    tensor_offset,
+    storage_cls,
+    dtype,
+    storage_device,
+    storage_handle,
+    storage_size_bytes,
+    storage_offset_bytes,
+    requires_grad,
+    ref_counter_handle,
+    ref_counter_offset,
+    event_handle,
+    event_sync_required,
+):
+    # If storage_handle is None, storage points to nullptr.
+    if storage_handle is None or storage_size_bytes == 0:
+        storage = storage_cls(0, dtype=dtype, device=storage_device, _internal=True)
+    else:
+        storage = storage_from_cache(
+            storage_cls, (storage_handle, storage_offset_bytes)
+        )
+        if storage is None:
+            torch.cuda._lazy_init()
+            storage = storage_cls._new_shared_cuda(
+                storage_device,
+                storage_handle,
+                storage_size_bytes,
+                storage_offset_bytes,
+                ref_counter_handle,
+                ref_counter_offset,
+                event_handle,
+                event_sync_required,
+            )
+            shared_cache[(storage_handle, storage_offset_bytes)] = StorageWeakRef(
+                storage
+            )
+        else:
+            # We already ref counting this Storage, but producer needs new ref-counters to be released.
+            storage_cls._release_ipc_counter(
+                ref_counter_handle, ref_counter_offset, device=storage_device
+            )
+    _storage = (
+        storage
+        if isinstance(storage, torch.UntypedStorage)
+        else storage._untyped_storage
+    )
+    t = torch._utils._rebuild_tensor(
+        torch.storage.TypedStorage(wrap_storage=_storage, dtype=dtype, _internal=True),
+        tensor_offset,
+        tensor_size,
+        tensor_stride,
+    )
+    if tensor_cls == torch.nn.parameter.Parameter:
+        # It is crucial for integer tensors to receive
+        # the requires_grad=False as an argument in the constructor
+        t = torch.nn.parameter.Parameter(t, requires_grad=requires_grad)
+    else:
+        t.requires_grad = requires_grad
+    return t
+def reduce_tensor(tensor):
+    if tensor.requires_grad and not tensor.is_leaf:
+        raise RuntimeError(
+            "Cowardly refusing to serialize non-leaf tensor which requires_grad, "
+            "since autograd does not support crossing process boundaries.  "
+            "If you just want to transfer the data, call detach() on the tensor "
+            "before serializing (e.g., putting it on the queue)."
+        )
+    check_serializing_named_tensor(tensor)
+    torch.utils.hooks.warn_if_has_hooks(tensor)
+    # Note [CUDA IPC and the caching allocator]
+    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    # When you send a CUDA tensor over IPC, you might expect that you will
+    # get out the same storage from the other end.  However, the CUDA caching
+    # allocator makes it difficult to preserve this invariant.  Consider
+    # the following situation: a tensor of size 0x100 points to offset 0x20 of
+    # a storage at 0xA100 of size 0x100.  (For simplicity, all of these
+    # sizes are given in bytes).  HOWEVER, with the caching allocator, this storage
+    # might be part of a larger cudaMalloc allocation 0xA000 of size 0x4000.
+    #
+    # When we want to send this CUDA tensor over IPC, we must send the
+    # *entire* cudaMalloc allocation, i.e., the 0xA000 region, not just
+    # the storage 0xA100 (because that is what CUDA supports).  So, on the
+    # other end, there simply isn't any way to say, "Wait, you gave me
+    # a bigger region (0xA000) than the one I wanted (0xA100)".
+    #
+    # OK, so if you sent the cudaMalloc allocation, can you just wrap that up as
+    # one storage itself? No, because this cudaMalloc allocation might contain
+    # storages of mixed types: float, bytes, double... If you make the entire
+    # allocation a single storage of a type A, we'll hit an error when constructing
+    # a tensor of type B on the storage.
+    #
+    # cudaIpcMemHandle is an identifier to access the sender cudaMalloc allocation on the
+    # receiver side. However, cudaIpcMemHandles from each device in a given process may
+    # only be opened by one context per device per other process.
+    # If we open and close a memory handle multiples times in a process, CUDA is allowed
+    # to give it a different address; similarly, once we close the memory, we're not
+    # allowed to access it(and the storage/tensor built on top of it), even if it is
+    # still live in the original process. As we cannot make a cudaMalloc allocation
+    # to a single storage in one go, this requires us to cache the device pointer for
+    # each cudaIpcMemHandle on C++ side to reconstruct types of storages, while keep
+    # the old ones alives.
+    # See [https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html]
+    #
+    # This is fine, because all we need to do is to save our position in the allocation,
+    # and reconstruct storage and tensor from it.
+    # 0xA000 ->  -------CUDA Allocation------
+    #           |                            |
+    #           |                            |
+    #           |                            |
+    #           |                            |
+    # 0xA100 ->  --------storage1 begin------
+    #           |                            |
+    # 0xA120 ->  --------tensor1 begin ------
+    #           |                            |
+    #           |                            |
+    #           |                            |
+    #           |                            |
+    #           |                            |
+    # 0xA160 ->  --------tensor1 end---------
+    #           |                            |
+    #           |                            |
+    #           |                            |
+    # 0xA200 ->  --------storage1 end--------
+    #           |                            |
+    # 0xE000 ->  --------CUDA allocation-----
+    #
+    # To send tensor1, the following info are required from sender to receiver for
+    # storage recontruction.
+    #   1. cudaIpcMemHandle of 0xA000(which can be mapped to a basePtr in receiver process).
+    #      basePtr may not be exactly 0xA000 since it's a different process.
+    #   2. offset(0xA100) of storage1 in the CUDA allocation.
+    #   3. size of storage1(0x100).
+    #
+    # On receiver side:
+    #   1. Get the devPtr of the MemHandle to access the memory, reconstruct a storage
+    #      of the same type using (basePtr, offset, size).
+    #   2. we can reconstruct the tensor on top of the reconstructed storage
+    #   Tensor(size=0x040, offset=0x020, storage=Storage(data=basePtr+0xA100, size=0x0100))
+    #
+    # This strategy has a few implications:
+    #
+    # 1. When we serialize a CUDA tensor for IPC, we cannot do it all in one
+    #    go (non-compositionally), and this requires to have a global map
+    #    memHandle -> devPtr for each process.
+    #
+    # 2. We MUST NOT let the new IPC tensor be resizable.  Originally, a resize
+    #    of the storage beyond 0x100 would merely have caused us to do a
+    #    reallocation.  You don't really want to do this, but if you did,
+    #    all that would happen is that you would lose IPC sharing.  But if
+    #    you do this in the new world, we will happily let you write out of
+    #    bounds of your "allocation", clobbering unrelated data in the cached
+    #    allocator block.  BAD!
+    #
+    # By the way, in old versions of PyTorch, we supported this situation
+    # natively using a "storage view", which permitted multiple storages to be
+    # views on each other.  But this was the *only* use of storage views, so we
+    # eliminated it so that we could just use tensor views to implement the same
+    # thing.
+    #
+    # TODO: Handle distinguishing between subclass and non-subclass versions of NT better
+    # https://github.com/pytorch/pytorch/issues/110543
+    from torch.nested._internal.nested_tensor import NestedTensor
+    if tensor.is_nested and not isinstance(tensor, NestedTensor):
+        return reduce_nested_tensor(tensor)
+    if tensor.layout in {
+        torch.sparse_coo,
+        torch.sparse_csr,
+        torch.sparse_bsr,
+        torch.sparse_csc,
+        torch.sparse_bsc,
+    }:
+        return reduce_sparse_tensor(tensor)
+    storage = tensor._typed_storage()
+    if storage._untyped_storage.device.type == "cuda":
+        (
+            device,
+            handle,
+            storage_size_bytes,
+            storage_offset_bytes,
+            ref_counter_handle,
+            ref_counter_offset,
+            event_handle,
+            event_sync_required,
+        ) = storage._share_cuda_()
+        tensor_offset = tensor.storage_offset()
+        shared_cache[handle] = StorageWeakRef(storage)
+        # _backward_hooks purposely omitted here, see
+        # Note [Don't serialize hooks]
+        return (
+            rebuild_cuda_tensor,
+            (
+                type(tensor),
+                tensor.size(),
+                tensor.stride(),
+                tensor_offset,  # tensor offset in its storage
+                type(storage),
+                tensor.dtype,
+                device,
+                handle,  # identifier which CUDA allocation is the storage in.
+                storage_size_bytes,  # size(in bytes) of the storage
+                storage_offset_bytes,  # offset(in bytes) of the storage in the CUDA allocation
+                tensor.requires_grad,
+                ref_counter_handle,
+                ref_counter_offset,
+                event_handle,
+                event_sync_required,
+            ),
+        )
+    elif storage._untyped_storage.device.type == "meta":
+        return (
+            rebuild_meta_tensor,
+            (
+                type(tensor),
+                tensor.size(),
+                tensor.stride(),
+                tensor.storage_offset(),
+                tensor.dtype,
+                tensor.untyped_storage().size(),
+                tensor.requires_grad,
+            ),
+        )
+    # _backward_hooks purposely omitted here, see Note [Don't serialize hooks]
+    metadata = (
+        tensor.storage_offset(),
+        tensor.size(),
+        tensor.stride(),
+        tensor.requires_grad,
+    )
+    return (rebuild_tensor, (type(tensor), storage, metadata))
+def rebuild_nested_tensor(
+    rebuild_buffer_func,
+    rebuild_buffer_args,
+    rebuild_sizes_func,
+    rebuild_sizes_args,
+    rebuild_strides_func,
+    rebuild_strides_args,
+    rebuild_offsets_func,
+    rebuild_offsets_args,
+):
+    buffer = rebuild_buffer_func(*rebuild_buffer_args)
+    sizes = rebuild_sizes_func(*rebuild_sizes_args)
+    strides = rebuild_strides_func(*rebuild_strides_args)
+    offsets = rebuild_offsets_func(*rebuild_offsets_args)
+    return torch._nested_view_from_buffer_copy(buffer, sizes, strides, offsets)
+def reduce_nested_tensor(nt):
+    rebuild_buffer_func, rebuild_buffer_args = reduce_tensor(nt.values())
+    rebuild_sizes_func, rebuild_sizes_args = reduce_tensor(nt._nested_tensor_size())
+    rebuild_strides_func, rebuild_strides_args = reduce_tensor(
+        nt._nested_tensor_strides()
+    )
+    rebuild_offsets_func, rebuild_offsets_args = reduce_tensor(
+        nt._nested_tensor_storage_offsets()
+    )
+    return (
+        rebuild_nested_tensor,
+        (
+            rebuild_buffer_func,
+            rebuild_buffer_args,
+            rebuild_sizes_func,
+            rebuild_sizes_args,
+            rebuild_strides_func,
+            rebuild_strides_args,
+            rebuild_offsets_func,
+            rebuild_offsets_args,
+        ),
+    )
+def rebuild_sparse_coo_tensor(
+    rebuild_indices_func,
+    rebuild_indices_args,
+    rebuild_values_func,
+    rebuild_values_args,
+    shape,
+    is_coalesced,
+):
+    indices = rebuild_indices_func(*rebuild_indices_args)
+    values = rebuild_values_func(*rebuild_values_args)
+    return torch.sparse_coo_tensor(indices, values, shape, is_coalesced=is_coalesced)
+def rebuild_sparse_compressed_tensor(
+    rebuild_compressed_indices_func,
+    rebuild_compressed_indices_args,
+    rebuild_plain_indices_func,
+    rebuild_plain_indices_args,
+    rebuild_values_func,
+    rebuild_values_args,
+    shape,
+    layout,
+):
+    compressed_indices = rebuild_compressed_indices_func(
+        *rebuild_compressed_indices_args
+    )
+    plain_indices = rebuild_plain_indices_func(*rebuild_plain_indices_args)
+    values = rebuild_values_func(*rebuild_values_args)
+    return torch.sparse_compressed_tensor(
+        compressed_indices, plain_indices, values, shape, layout=layout
+    )
+def reduce_sparse_tensor(sparse):
+    if sparse.layout is torch.sparse_coo:
+        rebuild_indices_func, rebuild_indices_args = reduce_tensor(sparse._indices())
+        rebuild_values_func, rebuild_values_args = reduce_tensor(sparse._values())
+        return (
+            rebuild_sparse_coo_tensor,
+            (
+                rebuild_indices_func,
+                rebuild_indices_args,
+                rebuild_values_func,
+                rebuild_values_args,
+                sparse.shape,
+                sparse.is_coalesced(),
+            ),
+        )
+    else:
+        if sparse.layout in {torch.sparse_csr, torch.sparse_bsr}:
+            compressed_indices = sparse.crow_indices()
+            plain_indices = sparse.col_indices()
+        elif sparse.layout in {torch.sparse_csc, torch.sparse_bsc}:
+            compressed_indices = sparse.ccol_indices()
+            plain_indices = sparse.row_indices()
+        else:
+            raise NotImplementedError(sparse.layout)
+        (
+            rebuild_compressed_indices_func,
+            rebuild_compressed_indices_args,
+        ) = reduce_tensor(compressed_indices)
+        rebuild_plain_indices_func, rebuild_plain_indices_args = reduce_tensor(
+            plain_indices
+        )
+        rebuild_values_func, rebuild_values_args = reduce_tensor(sparse.values())
+        return (
+            rebuild_sparse_compressed_tensor,
+            (
+                rebuild_compressed_indices_func,
+                rebuild_compressed_indices_args,
+                rebuild_plain_indices_func,
+                rebuild_plain_indices_args,
+                rebuild_values_func,
+                rebuild_values_args,
+                sparse.shape,
+                sparse.layout,
+            ),
+        )
+def fd_id(fd):
+    # Returns a tuple which uniquely identifies a file descriptor. In Mac OS,
+    # this doesn't work with shared memory handles, which is why we don't
+    # support the "file_descriptor" sharing method on that platform.
+    stat = os.fstat(fd)
+    return (stat.st_ino, stat.st_dev)
+def storage_from_cache(cls, key):
+    storage_ref = shared_cache.get(key)
+    if storage_ref is None:
+        return None
+    return torch.UntypedStorage._new_with_weak_ptr(storage_ref.cdata)
+def rebuild_storage_fd(cls, df, size):
+    fd = df.detach()
+    try:
+        storage = storage_from_cache(cls, fd_id(fd))
+        if storage is not None:
+            return storage
+        storage = cls._new_shared_fd_cpu(fd, size)
+        shared_cache[fd_id(fd)] = StorageWeakRef(storage)
+        return storage
+    finally:
+        os.close(fd)
+def rebuild_storage_filename(cls, manager, handle, size, dtype=None):
+    storage: Union[torch.TypedStorage, torch.UntypedStorage] = storage_from_cache(
+        cls, handle
+    )
+    if storage is not None:
+        return storage._shared_decref()
+    if dtype is None:
+        storage = torch.UntypedStorage._new_shared_filename_cpu(manager, handle, size)
+    else:
+        byte_size = size * torch._utils._element_size(dtype)
+        untyped_storage: torch.UntypedStorage = (
+            torch.UntypedStorage._new_shared_filename_cpu(manager, handle, byte_size)
+        )
+        storage = torch.TypedStorage(
+            wrap_storage=untyped_storage, dtype=dtype, _internal=True
+        )
+    shared_cache[handle] = StorageWeakRef(storage)
+    return storage._shared_decref()
+def rebuild_storage_empty(cls):
+    return cls()
+def rebuild_typed_storage(storage, dtype):
+    return torch.storage.TypedStorage(wrap_storage=storage, dtype=dtype, _internal=True)
+# Use for torch.storage.TypedStorage
+def reduce_typed_storage(storage):
+    return (rebuild_typed_storage, (storage._untyped_storage, storage.dtype))
+def rebuild_typed_storage_child(storage, storage_type):
+    return storage_type(wrap_storage=storage, _internal=True)
+# Use for child classes of torch.storage.TypedStorage, like torch.FloatStorage
+def reduce_typed_storage_child(storage):
+    return (rebuild_typed_storage_child, (storage._untyped_storage, type(storage)))
+def reduce_storage(storage):
+    from . import get_sharing_strategy
+    if storage.is_cuda:
+        raise RuntimeError(
+            "Cannot pickle CUDA storage; try pickling a CUDA tensor instead"
+        )
+    elif storage.device.type == "meta":
+        raise RuntimeError(
+            "Cannot pickle meta storage; try pickling a meta tensor instead"
+        )
+    elif get_sharing_strategy() == "file_system":
+        metadata = storage._share_filename_cpu_()
+        cache_key = metadata[1]
+        rebuild = rebuild_storage_filename
+        if isinstance(storage, torch.TypedStorage):
+            metadata += (storage.dtype,)
+        storage._shared_incref()
+    elif storage.size() == 0:
+        # This is special cased because Empty tensors
+        # (with size 0) cannot be mmapped.
+        return (rebuild_storage_empty, (type(storage),))
+    else:
+        fd, size = storage._share_fd_cpu_()
+        df = multiprocessing.reduction.DupFd(fd)
+        cache_key = fd_id(fd)
+        metadata = (df, size)
+        rebuild = rebuild_storage_fd  # type: ignore[assignment]
+    shared_cache[cache_key] = StorageWeakRef(storage)
+    return (rebuild, (type(storage),) + metadata)
+def init_reductions():
+    ForkingPickler.register(torch.cuda.Event, reduce_event)
+    for t in torch._storage_classes:
+        if t.__name__ == "UntypedStorage":
+            ForkingPickler.register(t, reduce_storage)
+        else:
+            ForkingPickler.register(t, reduce_typed_storage_child)
+    ForkingPickler.register(torch.storage.TypedStorage, reduce_typed_storage)
+    for t in torch._tensor_classes:
+        ForkingPickler.register(t, reduce_tensor)
+    # TODO: Maybe this should be in tensor_classes? :)
+    ForkingPickler.register(torch.Tensor, reduce_tensor)
+    from torch.nn.parameter import Parameter
+    ForkingPickler.register(Parameter, reduce_tensor)

.venv/lib/python3.11/site-packages/torch/multiprocessing/spawn.py ADDED Viewed

	@@ -0,0 +1,328 @@

+# mypy: allow-untyped-defs
+import logging
+import multiprocessing
+import multiprocessing.connection
+import os
+import pickle
+import signal
+import sys
+import tempfile
+import time
+import warnings
+from concurrent.futures import as_completed, ThreadPoolExecutor
+from typing import Optional
+from . import _prctl_pr_set_pdeathsig  # type: ignore[attr-defined]
+ENV_VAR_PARALLEL_START = "TORCH_MP_PARALLEL_START"
+log = logging.getLogger(__name__)
+__all__ = [
+    "ProcessContext",
+    "ProcessException",
+    "ProcessExitedException",
+    "ProcessRaisedException",
+    "spawn",
+    "SpawnContext",
+    "start_processes",
+]
+class ProcessException(Exception):
+    __slots__ = ["error_index", "error_pid"]
+    def __init__(self, msg: str, error_index: int, pid: int):
+        super().__init__(msg)
+        self.msg = msg
+        self.error_index = error_index
+        self.pid = pid
+    def __reduce__(self):
+        return type(self), (self.msg, self.error_index, self.pid)
+class ProcessRaisedException(ProcessException):
+    """Exception raised when a process failed due to an exception raised by the code."""
+    def __init__(
+        self,
+        msg: str,
+        error_index: int,
+        error_pid: int,
+    ):
+        super().__init__(msg, error_index, error_pid)
+class ProcessExitedException(ProcessException):
+    """Exception raised when a process failed due to signal or exited with a specific code."""
+    __slots__ = ["exit_code"]
+    def __init__(
+        self,
+        msg: str,
+        error_index: int,
+        error_pid: int,
+        exit_code: int,
+        signal_name: Optional[str] = None,
+    ):
+        super().__init__(msg, error_index, error_pid)
+        self.exit_code = exit_code
+        self.signal_name = signal_name
+    def __reduce__(self):
+        return (
+            type(self),
+            (self.msg, self.error_index, self.pid, self.exit_code, self.signal_name),
+        )
+def _wrap(fn, i, args, error_file):
+    # prctl(2) is a Linux specific system call.
+    # On other systems the following function call has no effect.
+    # This is set to ensure that non-daemonic child processes can
+    # terminate if their parent terminates before they do.
+    _prctl_pr_set_pdeathsig(signal.SIGINT)
+    try:
+        fn(i, *args)
+    except KeyboardInterrupt:
+        pass  # SIGINT; Killed by parent, do nothing
+    except Exception:
+        # Propagate exception to parent process, keeping original traceback
+        import traceback
+        with open(error_file, "wb") as fh:
+            pickle.dump(traceback.format_exc(), fh)
+        sys.exit(1)
+class ProcessContext:
+    def __init__(self, processes, error_files):
+        self.error_files = error_files
+        self.processes = processes
+        self.sentinels = {
+            process.sentinel: index for index, process in enumerate(processes)
+        }
+    def pids(self):
+        return [int(process.pid) for process in self.processes]
+    def join(self, timeout=None):
+        r"""Join one or more processes within spawn context.
+        Attempt to join one or more processes in this spawn context.
+        If one of them exited with a non-zero exit status, this function
+        kills the remaining processes and raises an exception with the cause
+        of the first process exiting.
+        Returns ``True`` if all processes have been joined successfully,
+        ``False`` if there are more processes that need to be joined.
+        Args:
+            timeout (float): Wait this long before giving up on waiting.
+        """
+        # Ensure this function can be called even when we're done.
+        if len(self.sentinels) == 0:
+            return True
+        # Wait for any process to fail or all of them to succeed.
+        ready = multiprocessing.connection.wait(
+            self.sentinels.keys(),
+            timeout=timeout,
+        )
+        error_index = None
+        for sentinel in ready:
+            index = self.sentinels.pop(sentinel)
+            process = self.processes[index]
+            process.join()
+            if process.exitcode != 0:
+                error_index = index
+                break
+        # Return if there was no error.
+        if error_index is None:
+            # Return whether or not all processes have been joined.
+            return len(self.sentinels) == 0
+        # Assume failure. Terminate processes that are still alive.
+        # Try SIGTERM then SIGKILL if the process isn't going down.
+        # The reason is related to python signal handling is limited
+        # to main thread and if that is in c/c++ land and stuck it won't
+        # to handle it. We have seen processes getting stuck not handling
+        # SIGTERM for the above reason.
+        timeout: int = 30
+        for process in self.processes:
+            if process.is_alive():
+                log.warning("Terminating process %s via signal SIGTERM", process.pid)
+                process.terminate()
+        end = time.monotonic() + timeout
+        for process in self.processes:
+            time_to_wait = max(0, end - time.monotonic())
+            process.join(time_to_wait)
+        for process in self.processes:
+            if process.is_alive():
+                log.warning(
+                    "Unable to shutdown process %s via SIGTERM , forcefully exiting via SIGKILL",
+                    process.pid,
+                )
+                process.kill()
+            process.join()
+        # The file will only be created if the process crashed.
+        failed_process = self.processes[error_index]
+        if not os.access(self.error_files[error_index], os.R_OK):
+            exitcode = self.processes[error_index].exitcode
+            if exitcode < 0:
+                try:
+                    name = signal.Signals(-exitcode).name
+                except ValueError:
+                    name = f"<Unknown signal {-exitcode}>"
+                raise ProcessExitedException(
+                    "process %d terminated with signal %s" % (error_index, name),
+                    error_index=error_index,
+                    error_pid=failed_process.pid,
+                    exit_code=exitcode,
+                    signal_name=name,
+                )
+            else:
+                raise ProcessExitedException(
+                    "process %d terminated with exit code %d" % (error_index, exitcode),
+                    error_index=error_index,
+                    error_pid=failed_process.pid,
+                    exit_code=exitcode,
+                )
+        with open(self.error_files[error_index], "rb") as fh:
+            original_trace = pickle.load(fh)
+        msg = "\n\n-- Process %d terminated with the following error:\n" % error_index
+        msg += original_trace
+        raise ProcessRaisedException(msg, error_index, failed_process.pid)
+class SpawnContext(ProcessContext):
+    def __init__(self, processes, error_files):
+        warnings.warn("SpawnContext is renamed to ProcessContext since 1.4 release.")
+        super().__init__(processes, error_files)
+# Note: [start_processes]
+# mp.start_processes handles both start_method='spawn' and 'fork'. It's supposed to be a
+# more generalized API than mp.spawn. Currently we only document mp.spawn as it's the
+# CUDA compatible start_method. However, in environments like Ipython notebooks, 'fork'
+# works better than 'spawn'. Every helper function we created for mp.spawn is indeed
+# general enough, and backends like XLA can reuse them in Colab notebooks as well.
+# Currently we only add this API first, we can consider adding it to documentation as
+# needed in the future.
+def start_processes(
+    fn,
+    args=(),
+    nprocs=1,
+    join=True,
+    daemon=False,
+    start_method="spawn",
+):
+    # To speed up performance in certain cases (see https://github.com/pytorch/pytorch/issues/133010),
+    # this func will start processes in parallel if start_method is 'forkserver'.
+    # Please opt in to this perf optimization by setting env var (TORCH_MP_PARALLEL_START) to 1.
+    # todo: investigate why spawn does not work with threadpool and raises SIGINT
+    if (
+        start_method == "forkserver"
+        and os.environ.get(ENV_VAR_PARALLEL_START, "0") == "1"
+    ):
+        log.info("Starting processes in parallel.")
+        start_parallel = True
+    else:
+        # Set env var TORCH_MP_PARALLEL_START to 0 to disable parallel start
+        start_parallel = False
+    mp = multiprocessing.get_context(start_method)
+    error_files = [None] * nprocs
+    processes = [None] * nprocs
+    def start_process(i):
+        # Each process is assigned a file to write tracebacks to.  We
+        # use the file being non-empty to indicate an exception
+        # occurred (vs an expected shutdown).  Note: this previously
+        # used a multiprocessing.Queue but that can be prone to
+        # deadlocks, so we went with a simpler solution for a one-shot
+        # message between processes.
+        tf = tempfile.NamedTemporaryFile(
+            prefix="pytorch-errorfile-", suffix=".pickle", delete=False
+        )
+        tf.close()
+        os.unlink(tf.name)
+        process = mp.Process(
+            target=_wrap,
+            args=(fn, i, args, tf.name),
+            daemon=daemon,
+        )
+        process.start()
+        return i, process, tf.name
+    if not start_parallel:
+        for i in range(nprocs):
+            idx, process, tf_name = start_process(i)
+            error_files[idx] = tf_name
+            processes[idx] = process
+    else:
+        with ThreadPoolExecutor(max_workers=nprocs) as executor:
+            futures = [executor.submit(start_process, i) for i in range(nprocs)]
+            for fut in as_completed(futures):
+                idx, process, tf_name = fut.result()
+                # idx and process rank needs to be the same.
+                error_files[idx] = tf_name
+                processes[idx] = process
+    context = ProcessContext(processes, error_files)
+    if not join:
+        return context
+    # Loop on join until it returns True or raises an exception.
+    while not context.join():
+        pass
+def spawn(fn, args=(), nprocs=1, join=True, daemon=False, start_method="spawn"):
+    r"""Spawns ``nprocs`` processes that run ``fn`` with ``args``.
+    If one of the processes exits with a non-zero exit status, the
+    remaining processes are killed and an exception is raised with the
+    cause of termination. In the case an exception was caught in the
+    child process, it is forwarded and its traceback is included in
+    the exception raised in the parent process.
+    Args:
+        fn (function): Function is called as the entrypoint of the
+            spawned process. This function must be defined at the top
+            level of a module so it can be pickled and spawned. This
+            is a requirement imposed by multiprocessing.
+            The function is called as ``fn(i, *args)``, where ``i`` is
+            the process index and ``args`` is the passed through tuple
+            of arguments.
+        args (tuple): Arguments passed to ``fn``.
+        nprocs (int): Number of processes to spawn.
+        join (bool): Perform a blocking join on all processes.
+        daemon (bool): The spawned processes' daemon flag. If set to True,
+                       daemonic processes will be created.
+        start_method (str): (deprecated) this method will always use ``spawn``
+                               as the start method. To use a different start method
+                               use ``start_processes()``.
+    Returns:
+        None if ``join`` is ``True``,
+        :class:`~ProcessContext` if ``join`` is ``False``
+    """
+    if start_method != "spawn":
+        msg = (
+            f"This method only supports start_method=spawn (got: {start_method}).\n"
+            "To use a different start_method use:\n\t\t"
+            " torch.multiprocessing.start_processes(...)"
+        )
+        warnings.warn(msg, FutureWarning, stacklevel=2)
+    return start_processes(fn, args, nprocs, join, daemon, start_method="spawn")

.venv/lib/python3.11/site-packages/torch/nn/quantizable/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (246 Bytes). View file

.venv/lib/python3.11/site-packages/torch/nn/quantizable/modules/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from torch.ao.nn.quantizable.modules.activation import MultiheadAttention
+from torch.ao.nn.quantizable.modules.rnn import LSTM, LSTMCell
+__all__ = [
+    "LSTM",
+    "LSTMCell",
+    "MultiheadAttention",
+]

.venv/lib/python3.11/site-packages/torch/nn/quantizable/modules/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (454 Bytes). View file

.venv/lib/python3.11/site-packages/torch/nn/quantizable/modules/__pycache__/activation.cpython-311.pyc ADDED Viewed

Binary file (669 Bytes). View file

.venv/lib/python3.11/site-packages/torch/nn/quantizable/modules/__pycache__/rnn.cpython-311.pyc ADDED Viewed

Binary file (665 Bytes). View file

.venv/lib/python3.11/site-packages/torch/nn/quantized/dynamic/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (253 Bytes). View file

.venv/lib/python3.11/site-packages/torch/nn/quantized/dynamic/modules/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.37 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/quantized/dynamic/modules/__pycache__/conv.cpython-311.pyc ADDED Viewed

Binary file (878 Bytes). View file

.venv/lib/python3.11/site-packages/torch/nn/quantized/dynamic/modules/__pycache__/linear.cpython-311.pyc ADDED Viewed

Binary file (679 Bytes). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from . import parametrizations, rnn, stateless
+from .clip_grad import clip_grad_norm, clip_grad_norm_, clip_grad_value_
+from .convert_parameters import parameters_to_vector, vector_to_parameters
+from .fusion import (
+    fuse_conv_bn_eval,
+    fuse_conv_bn_weights,
+    fuse_linear_bn_eval,
+    fuse_linear_bn_weights,
+)
+from .init import skip_init
+from .memory_format import (
+    convert_conv2d_weight_memory_format,
+    convert_conv3d_weight_memory_format,
+)
+from .spectral_norm import remove_spectral_norm, spectral_norm
+from .weight_norm import remove_weight_norm, weight_norm
+__all__ = [
+    "clip_grad_norm",
+    "clip_grad_norm_",
+    "clip_grad_value_",
+    "convert_conv2d_weight_memory_format",
+    "convert_conv3d_weight_memory_format",
+    "fuse_conv_bn_eval",
+    "fuse_conv_bn_weights",
+    "fuse_linear_bn_eval",
+    "fuse_linear_bn_weights",
+    "parameters_to_vector",
+    "parametrizations",
+    "remove_spectral_norm",
+    "remove_weight_norm",
+    "rnn",
+    "skip_init",
+    "spectral_norm",
+    "stateless",
+    "vector_to_parameters",
+    "weight_norm",
+]

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.25 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/_deprecation_utils.cpython-311.pyc ADDED Viewed

Binary file (2.3 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/_named_member_accessor.cpython-311.pyc ADDED Viewed

Binary file (19.9 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/_per_sample_grad.cpython-311.pyc ADDED Viewed

Binary file (6.85 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/clip_grad.cpython-311.pyc ADDED Viewed

Binary file (9.91 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/convert_parameters.cpython-311.pyc ADDED Viewed

Binary file (3.61 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/fusion.cpython-311.pyc ADDED Viewed

Binary file (7.23 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/init.cpython-311.pyc ADDED Viewed

Binary file (2.75 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/memory_format.cpython-311.pyc ADDED Viewed

Binary file (8.39 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/parametrizations.cpython-311.pyc ADDED Viewed

Binary file (26.7 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/parametrize.cpython-311.pyc ADDED Viewed

Binary file (35.5 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/prune.cpython-311.pyc ADDED Viewed

Binary file (59.1 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/rnn.cpython-311.pyc ADDED Viewed

Binary file (28.1 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/spectral_norm.cpython-311.pyc ADDED Viewed

Binary file (17 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/stateless.cpython-311.pyc ADDED Viewed

Binary file (13.6 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/__pycache__/weight_norm.cpython-311.pyc ADDED Viewed

Binary file (8.14 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/_deprecation_utils.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# mypy: allow-untyped-defs
+import importlib
+import warnings
+from typing import Callable, List
+_MESSAGE_TEMPLATE = (
+    r"Usage of '{old_location}' is deprecated; please use '{new_location}' instead."
+)
+def lazy_deprecated_import(
+    all: List[str],
+    old_module: str,
+    new_module: str,
+) -> Callable:
+    r"""Import utility to lazily import deprecated packages / modules / functional.
+    The old_module and new_module are also used in the deprecation warning defined
+    by the `_MESSAGE_TEMPLATE`.
+    Args:
+        all: The list of the functions that are imported. Generally, the module's
+            __all__ list of the module.
+        old_module: Old module location
+        new_module: New module location / Migrated location
+    Returns:
+        Callable to assign to the `__getattr__`
+    Usage:
+        # In the `torch/nn/quantized/functional.py`
+        from torch.nn.utils._deprecation_utils import lazy_deprecated_import
+        _MIGRATED_TO = "torch.ao.nn.quantized.functional"
+        __getattr__ = lazy_deprecated_import(
+            all=__all__,
+            old_module=__name__,
+            new_module=_MIGRATED_TO)
+    """
+    warning_message = _MESSAGE_TEMPLATE.format(
+        old_location=old_module, new_location=new_module
+    )
+    def getattr_dunder(name):
+        if name in all:
+            # We are using the "RuntimeWarning" to make sure it is not
+            # ignored by default.
+            warnings.warn(warning_message, RuntimeWarning)
+            package = importlib.import_module(new_module)
+            return getattr(package, name)
+        raise AttributeError(f"Module {new_module!r} has no attribute {name!r}.")
+    return getattr_dunder