koichi12 commited on Feb 12, 2025

Commit

b2f8f15

verified ·

1 Parent(s): d9bcc7f

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__init__.py +179 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/aoti_eager.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/async_compile.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/autotune_process.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/bounds.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/comm_analysis.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/comms.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/compile_fx.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/config.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/constant_folding.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/cpp_builder.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/cpu_vec_isa.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/cudagraph_utils.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/debug.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/decomposition.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/dependencies.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/exc.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/extern_node_serializer.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/freezing.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/fx_utils.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/graph.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/hooks.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/index_propagation.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/inductor_prims.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/jagged_lowerings.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/loop_body.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/metrics.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/mkldnn_ir.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/mkldnn_lowerings.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/ops_handler.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/optimize_indexing.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/quantized_lowerings.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/remote_cache.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/scheduler.cpython-311.pyc +3 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/select_algorithm.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/sizevars.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/subgraph_lowering.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/test_case.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/test_operators.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/virtualized.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/wrapper_benchmark.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/aoti_eager.py +298 -0
.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/__pycache__/autoheuristic.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/__pycache__/learned_heuristic_controller.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/__pycache__/learnedheuristic_interface.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/artifacts/__init__.py +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/artifacts/__pycache__/_MMRankingA100.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/artifacts/__pycache__/_MMRankingH100.cpython-311.pyc +0 -0

.gitattributes CHANGED Viewed

@@ -128,3 +128,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/_
 .venv/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_ops.so.9 filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/torch/_export/serde/__pycache__/serialize.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/torch/nn/__pycache__/functional.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text

 .venv/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_ops.so.9 filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/torch/_export/serde/__pycache__/serialize.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 .venv/lib/python3.11/site-packages/torch/nn/__pycache__/functional.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/scheduler.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text

.venv/lib/python3.11/site-packages/torch/_inductor/__init__.py ADDED Viewed

	@@ -0,0 +1,179 @@

+# mypy: allow-untyped-defs
+from typing import Any, Dict, List, Optional, Tuple
+import torch.fx
+import torch.utils._pytree as pytree
+__all__ = ["compile", "list_mode_options", "list_options", "cudagraph_mark_step_begin"]
+def compile(
+    gm: torch.fx.GraphModule,
+    example_inputs: List[torch.Tensor],
+    options: Optional[Dict[str, Any]] = None,
+):
+    """
+    Compile a given FX graph with TorchInductor.  This allows compiling
+    FX graphs captured without using TorchDynamo.
+    Args:
+        gm: The FX graph to compile.
+        example_inputs:  List of tensor inputs.
+        options:  Optional dict of config options.  See `torch._inductor.config`.
+    Returns:
+        Callable with same behavior as gm but faster.
+    """
+    from .compile_fx import compile_fx
+    return compile_fx(gm, example_inputs, config_patches=options)
+def aot_compile(
+    gm: torch.fx.GraphModule,
+    args: Tuple[Any],
+    kwargs: Optional[Dict[str, Any]] = None,
+    *,
+    options: Optional[Dict[str, Any]] = None,
+) -> str:
+    """
+    Ahead-of-time compile a given FX graph with TorchInductor into a shared library.
+    Args:
+        gm: The FX graph to compile.
+        args:  Example arguments
+        kwargs: Example keyword arguments
+        options:  Optional dict of config options.  See `torch._inductor.config`.
+    Returns:
+        Path to the generated shared library
+    """
+    from .compile_fx import compile_fx_aot, graph_returns_tuple
+    assert graph_returns_tuple(gm), (
+        "Graph output must be a tuple(). This is so that we can avoid "
+        "pytree processing of the outputs. Please change the module to "
+        "have tuple outputs."
+    )
+    # We will serialize the pytree info into the .so as constant strings
+    in_spec = None
+    out_spec = None
+    if isinstance(gm.graph._codegen, torch.fx.graph._PyTreeCodeGen):
+        codegen = gm.graph._codegen
+        gm.graph._codegen = torch.fx.graph.CodeGen()
+        gm.recompile()
+        if codegen.pytree_info.in_spec is not None:
+            in_spec = codegen.pytree_info.in_spec
+        if codegen.pytree_info.out_spec is not None:
+            out_spec = codegen.pytree_info.out_spec
+    else:
+        if hasattr(gm, "_in_spec"):
+            in_spec = gm._in_spec
+        if hasattr(gm, "_out_spec"):
+            out_spec = gm._out_spec
+    serialized_in_spec = pytree.treespec_dumps(in_spec) if in_spec is not None else ""
+    serialized_out_spec = (
+        pytree.treespec_dumps(out_spec) if out_spec is not None else ""
+    )
+    flat_args_with_path, received_spec = pytree.tree_flatten_with_path(
+        (args, kwargs or {})
+    )
+    # Replace non-tensor (constant) inputs with Nones, since these are not being
+    # used anyways by the graph
+    flat_example_inputs = [
+        x[1] if isinstance(x[1], torch.Tensor) else None for x in flat_args_with_path
+    ]
+    if in_spec is not None and received_spec != in_spec:
+        raise ValueError(  # noqa: B904
+            "Trying to flatten user inputs with exported input tree spec: \n"
+            f"{in_spec}\n"
+            "but actually got inputs with tree spec of: \n"
+            f"{received_spec}"
+        )
+    options = (
+        {
+            "aot_inductor.serialized_in_spec": serialized_in_spec,
+            "aot_inductor.serialized_out_spec": serialized_out_spec,
+        }
+        if options is None
+        else {
+            **options,
+            "aot_inductor.serialized_in_spec": serialized_in_spec,
+            "aot_inductor.serialized_out_spec": serialized_out_spec,
+        }
+    )
+    return compile_fx_aot(
+        gm,
+        flat_example_inputs,  # type: ignore[arg-type]
+        config_patches=options,
+    )
+def list_mode_options(
+    mode: Optional[str] = None, dynamic: Optional[bool] = None
+) -> Dict[str, Any]:
+    r"""Returns a dictionary describing the optimizations that each of the available
+    modes passed to `torch.compile()` performs.
+    Args:
+        mode (str, optional): The mode to return the optimizations for.
+        If None, returns optimizations for all modes
+        dynamic (bool, optional): Whether dynamic shape is enabled.
+    Example::
+        >>> torch._inductor.list_mode_options()
+    """
+    mode_options: Dict[str, Dict[str, bool]] = {
+        "default": {},
+        # enable cudagraphs
+        "reduce-overhead": {
+            "triton.cudagraphs": True,
+        },
+        # enable max-autotune
+        "max-autotune-no-cudagraphs": {
+            "max_autotune": True,
+        },
+        # enable max-autotune
+        # enable cudagraphs
+        "max-autotune": {
+            "max_autotune": True,
+            "triton.cudagraphs": True,
+        },
+    }
+    return mode_options[mode] if mode else mode_options  # type: ignore[return-value]
+def list_options() -> List[str]:
+    r"""Returns a dictionary describing the optimizations and debug configurations
+    that are available to `torch.compile()`.
+    The options are documented in `torch._inductor.config`.
+    Example::
+        >>> torch._inductor.list_options()
+    """
+    from torch._inductor import config
+    current_config: Dict[str, Any] = config.shallow_copy_dict()
+    return list(current_config.keys())
+def cudagraph_mark_step_begin():
+    "Indicates that a new iteration of inference or training is about to begin."
+    from .cudagraph_trees import mark_step_begin
+    mark_step_begin()

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (6.4 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/aoti_eager.cpython-311.pyc ADDED Viewed

Binary file (16.2 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/async_compile.cpython-311.pyc ADDED Viewed

Binary file (15.2 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/autotune_process.cpython-311.pyc ADDED Viewed

Binary file (42.1 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/bounds.cpython-311.pyc ADDED Viewed

Binary file (8.59 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/comm_analysis.cpython-311.pyc ADDED Viewed

Binary file (8.05 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/comms.cpython-311.pyc ADDED Viewed

Binary file (28.7 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/compile_fx.cpython-311.pyc ADDED Viewed

Binary file (77.3 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (28.3 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/constant_folding.cpython-311.pyc ADDED Viewed

Binary file (18.2 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/cpp_builder.cpython-311.pyc ADDED Viewed

Binary file (67.5 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/cpu_vec_isa.cpython-311.pyc ADDED Viewed

Binary file (16.9 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/cudagraph_utils.cpython-311.pyc ADDED Viewed

Binary file (16.6 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/debug.cpython-311.pyc ADDED Viewed

Binary file (39.5 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/decomposition.cpython-311.pyc ADDED Viewed

Binary file (48.3 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/dependencies.cpython-311.pyc ADDED Viewed

Binary file (45.2 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/exc.cpython-311.pyc ADDED Viewed

Binary file (7.68 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/extern_node_serializer.cpython-311.pyc ADDED Viewed

Binary file (1.72 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/freezing.cpython-311.pyc ADDED Viewed

Binary file (16.7 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/fx_utils.cpython-311.pyc ADDED Viewed

Binary file (14 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/graph.cpython-311.pyc ADDED Viewed

Binary file (97.9 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/hooks.cpython-311.pyc ADDED Viewed

Binary file (1.31 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/index_propagation.cpython-311.pyc ADDED Viewed

Binary file (23.3 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/inductor_prims.cpython-311.pyc ADDED Viewed

Binary file (8.69 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/jagged_lowerings.cpython-311.pyc ADDED Viewed

Binary file (10.6 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/loop_body.cpython-311.pyc ADDED Viewed

Binary file (37.8 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/metrics.cpython-311.pyc ADDED Viewed

Binary file (17.6 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/mkldnn_ir.cpython-311.pyc ADDED Viewed

Binary file (62.5 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/mkldnn_lowerings.cpython-311.pyc ADDED Viewed

Binary file (41.9 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/ops_handler.cpython-311.pyc ADDED Viewed

Binary file (58.7 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/optimize_indexing.cpython-311.pyc ADDED Viewed

Binary file (4.82 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/quantized_lowerings.cpython-311.pyc ADDED Viewed

Binary file (4.29 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/remote_cache.cpython-311.pyc ADDED Viewed

Binary file (11.4 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/scheduler.cpython-311.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48fc35b3ba35cd6f3ba02d218d951aa8a531c58ad217a2e94bfb14483e5a78af
+size 216212

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/select_algorithm.cpython-311.pyc ADDED Viewed

Binary file (92.5 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/sizevars.cpython-311.pyc ADDED Viewed

Binary file (48.2 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/subgraph_lowering.cpython-311.pyc ADDED Viewed

Binary file (10.9 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/test_case.cpython-311.pyc ADDED Viewed

Binary file (2.38 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/test_operators.cpython-311.pyc ADDED Viewed

Binary file (2.06 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/virtualized.cpython-311.pyc ADDED Viewed

Binary file (22 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/wrapper_benchmark.cpython-311.pyc ADDED Viewed

Binary file (15.1 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/aoti_eager.py ADDED Viewed

	@@ -0,0 +1,298 @@

+import json
+import logging
+import os
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Tuple
+from unittest import mock
+import torch
+import torch._export
+from torch._inductor.utils import is_cpu_device
+from .runtime.runtime_utils import cache_dir
+log = logging.getLogger(__name__)
+def aoti_eager_cache_dir(namespace: str, device: str) -> Path:
+    return Path(cache_dir()) / "aoti_eager" / namespace / device
+def aoti_eager_op_conf_lock(op_func_name_with_overload: str) -> Any:
+    from filelock import FileLock
+    # Avoid circular import
+    from torch._inductor.codecache import get_lock_dir, LOCK_TIMEOUT
+    op_conf_lock_file = f"{op_func_name_with_overload}.lock"
+    lock_dir = get_lock_dir()
+    return FileLock(os.path.join(lock_dir, op_conf_lock_file), timeout=LOCK_TIMEOUT)
+def load_aoti_eager_cache(
+    ns: str, op_func_name_with_overload: str, device_type: str
+) -> List[Optional[Dict[str, Any]]]:
+    device_kernel_cache = aoti_eager_cache_dir(ns, device_type)
+    op_conf = device_kernel_cache / f"{op_func_name_with_overload}.json"
+    if not op_conf.exists():
+        return []
+    try:
+        with aoti_eager_op_conf_lock(op_func_name_with_overload):
+            with open(op_conf) as f:
+                json_data = json.load(f)
+                for item in json_data:
+                    # Get absolution path for kernel library
+                    kernel_lib_abs_path = device_kernel_cache / item["kernel_path"]
+                    item["kernel_path"] = kernel_lib_abs_path.as_posix()
+                    # Check if the kernel library exists
+                    if not kernel_lib_abs_path.exists():
+                        return []
+                    for metadata in item["meta_info"]:
+                        if metadata.get("is_dynamic"):
+                            raise NotImplementedError(
+                                "Only support static shape for now"
+                            )
+                        if (
+                            "device_type" in metadata
+                            and metadata["device_type"] == "cpu"
+                        ):
+                            metadata["device_index"] = -1
+                        for dtype_key in ["dtype", "dtype_value"]:
+                            if dtype_key in metadata:
+                                metadata[dtype_key] = getattr(
+                                    torch, metadata[dtype_key].split(".")[-1]
+                                )
+                        if "layout_value" in metadata:
+                            metadata["layout_value"] = getattr(
+                                torch, metadata["layout_value"].split(".")[-1]
+                            )
+                        if "memory_format_value" in metadata:
+                            metadata["memory_format_value"] = getattr(
+                                torch, metadata["memory_format_value"].split(".")[-1]
+                            )
+                return json_data
+    except Exception as e:
+        err_msg = f"Failed to load aoti eager cache: {e}"
+        log.exception(err_msg)
+        return []
+def supported_builtin_dtype_torch_dtype() -> Dict[type, torch.dtype]:
+    return {int: torch.int32, float: torch.float, bool: torch.bool}
+def supported_scalar_types() -> Tuple[type, ...]:
+    type_to_torch_dtype = supported_builtin_dtype_torch_dtype()
+    return tuple(type_to_torch_dtype.keys())
+def extract_tensor_metadata(dynamic: bool, input: torch.Tensor) -> Dict[str, Any]:
+    metadata: Dict[str, Any] = {}
+    metadata["is_dynamic"] = dynamic
+    assert isinstance(input, torch.Tensor)
+    metadata["device_type"] = f"{input.device.type}"
+    if is_cpu_device([input]):
+        metadata["device_index"] = -1
+    else:
+        metadata["device_index"] = input.device.index
+    metadata["dtype"] = f"{input.dtype}"
+    metadata["sizes"] = list(input.size())
+    metadata["strides"] = list(input.stride())
+    metadata["requires_grad"] = input.requires_grad
+    metadata["dispatch_key_set"] = torch._C._dispatch_keys(input).raw_repr()
+    return metadata
+def extract_tensor_list_metadata(
+    dynamic: bool,
+    input: List[torch.Tensor],
+) -> Dict[str, Any]:
+    metadata_list = []
+    for item in input:
+        assert isinstance(item, torch.Tensor)
+        metadata_list.append(extract_tensor_metadata(dynamic, item))
+    metadata: Dict[str, Any] = {}
+    metadata["tensor_list"] = metadata_list
+    return metadata
+def extract_scalar_metadata(device_type: str, input: Any) -> Dict[str, Any]:
+    assert isinstance(input, supported_scalar_types())
+    metadata: Dict[str, Any] = {}
+    metadata["is_dynamic"] = False
+    # Scalar tensor
+    metadata["device_type"] = device_type
+    metadata["device_index"] = -1 if device_type == "cpu" else 0
+    type_to_torch_dtype = supported_builtin_dtype_torch_dtype()
+    metadata["dtype"] = f"{type_to_torch_dtype[type(input)]}"
+    metadata["scalar_value"] = input
+    return metadata
+def extract_string_metadata(input: str) -> Dict[str, Any]:
+    assert isinstance(input, str)
+    metadata: Dict[str, Any] = {}
+    metadata["string_value"] = input
+    return metadata
+def extract_dtype_metadata(input: torch.dtype) -> Dict[str, Any]:
+    assert isinstance(input, torch.dtype)
+    metadata: Dict[str, Any] = {}
+    metadata["dtype_value"] = f"{input}"
+    return metadata
+def extract_device_metadata(input: torch.device) -> Dict[str, Any]:
+    assert isinstance(input, torch.device)
+    metadata: Dict[str, Any] = {}
+    metadata["device_type_value"] = f"{input.type}"
+    metadata["device_index_value"] = input.index
+    return metadata
+def extract_layout_metadata(input: torch.layout) -> Dict[str, Any]:
+    assert isinstance(input, torch.layout)
+    metadata: Dict[str, Any] = {}
+    metadata["layout_value"] = f"{input}"
+    return metadata
+def aoti_compile_with_persistent_cache(
+    ns: str,
+    op_func_name_with_overload: str,
+    device_type: str,
+    dynamic: bool,
+    f: Callable[..., Any],
+    args: Tuple[Any],
+    kwargs: Dict[str, Any],
+    *,
+    dynamic_shapes: Optional[Dict[str, Any]] = None,
+    options: Optional[Dict[str, Any]] = None,
+    remove_runtime_assertions: bool = False,
+    disable_constraint_solver: bool = False,
+) -> str:
+    """
+    Compile the given function with persistent cache for AOTI eager mode.
+    """
+    assert not dynamic, "Only support static shape for now"
+    flattened_inputs = list(args) + list(kwargs.values())
+    if not all(
+        isinstance(
+            input,
+            (
+                supported_scalar_types(),
+                torch.Tensor,
+                list,
+                str,
+                torch.dtype,
+                torch.device,
+                torch.layout,
+            ),
+        )
+        for input in flattened_inputs
+    ):
+        err_msg = f"Unsupported input types: {flattened_inputs}"
+        log.exception(err_msg)
+        raise NotImplementedError(err_msg)
+    for input in flattened_inputs:
+        if isinstance(input, list) and not all(
+            isinstance(item, torch.Tensor) for item in input
+        ):
+            err_msg = f"_impl_with_aoti_compile encounters unsupported input types: {flattened_inputs}"
+            log.exception(err_msg)
+            raise NotImplementedError(err_msg)
+    persistent_cache = aoti_eager_cache_dir(ns, device_type)
+    if not persistent_cache.exists():
+        persistent_cache.mkdir(parents=True)
+    persistent_cache_lib = persistent_cache / "lib"
+    if not persistent_cache_lib.exists():
+        persistent_cache_lib.mkdir()
+    with mock.patch.dict(
+        os.environ,
+        {"TORCHINDUCTOR_CACHE_DIR": persistent_cache_lib.absolute().as_posix()},
+    ):
+        try:
+            kernel_lib_path = torch._export.aot_compile(
+                f,
+                args,
+                kwargs,
+                dynamic_shapes=dynamic_shapes,
+                remove_runtime_assertions=remove_runtime_assertions,
+                disable_constraint_solver=disable_constraint_solver,
+                # Some operations may have non-Tensor parameters like int, float, bool. These
+                # non-Tensor parameters will not be the input of the graph. Therefore, we do
+                # need to keep the same signature.
+                same_signature=False,
+            )
+            kernel_metadata_items = []
+            for idx, input in enumerate(flattened_inputs):
+                if isinstance(input, torch.Tensor):
+                    metadata = extract_tensor_metadata(dynamic, input)
+                elif isinstance(input, list):
+                    assert all(isinstance(item, torch.Tensor) for item in input)
+                    metadata = extract_tensor_list_metadata(dynamic, input)
+                elif isinstance(input, supported_scalar_types()):
+                    metadata = extract_scalar_metadata(device_type, input)
+                elif isinstance(input, str):
+                    metadata = extract_string_metadata(input)
+                elif isinstance(input, torch.dtype):
+                    metadata = extract_dtype_metadata(input)
+                elif isinstance(input, torch.device):
+                    metadata = extract_device_metadata(input)
+                elif isinstance(input, torch.layout):
+                    metadata = extract_layout_metadata(input)
+                else:
+                    raise NotImplementedError(f"Unsupported input type: {type(input)}")
+                metadata["arg_order"] = idx
+                kernel_metadata_items.append(metadata)
+            kernel_meta_info: Dict[str, Any] = {}
+            kernel_meta_info["meta_info"] = kernel_metadata_items
+            kernel_meta_info["kernel_path"] = (
+                Path(kernel_lib_path).relative_to(persistent_cache).as_posix()
+            )
+            json_data = []
+            update_json = True
+            op_conf = persistent_cache / f"{op_func_name_with_overload}.json"
+            mode = "r" if op_conf.exists() else "w"
+            with aoti_eager_op_conf_lock(op_func_name_with_overload):
+                with open(op_conf, mode) as op_conf_file:
+                    try:
+                        json_data = json.load(op_conf_file)
+                    except Exception as e:
+                        json_data = []
+                    assert isinstance(json_data, list)
+                    for item in json_data:
+                        assert isinstance(item, dict)
+                        # Same kernel meta info already exists in the json file
+                        if item["meta_info"] == kernel_metadata_items:
+                            update_json = False
+                            break
+                if update_json:
+                    json_data.append(kernel_meta_info)
+                    with open(op_conf, "w") as op_conf_file:
+                        json.dump(json_data, op_conf_file, indent=4)
+            return kernel_lib_path
+        except Exception as e:
+            err_msg = f"Failed to compile {op_func_name_with_overload}: {e}"
+            log.exception(err_msg)
+            return ""

.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/__pycache__/autoheuristic.cpython-311.pyc ADDED Viewed

Binary file (17.5 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/__pycache__/learned_heuristic_controller.cpython-311.pyc ADDED Viewed

Binary file (5.86 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/__pycache__/learnedheuristic_interface.cpython-311.pyc ADDED Viewed

Binary file (6.44 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/artifacts/__init__.py ADDED Viewed

File without changes

.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/artifacts/__pycache__/_MMRankingA100.cpython-311.pyc ADDED Viewed

Binary file (38.3 kB). View file

.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/artifacts/__pycache__/_MMRankingH100.cpython-311.pyc ADDED Viewed

Binary file (40.9 kB). View file