koichi12 commited on
Commit
b2f8f15
·
verified ·
1 Parent(s): d9bcc7f

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. .venv/lib/python3.11/site-packages/torch/_inductor/__init__.py +179 -0
  3. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/__init__.cpython-311.pyc +0 -0
  4. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/aoti_eager.cpython-311.pyc +0 -0
  5. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/async_compile.cpython-311.pyc +0 -0
  6. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/autotune_process.cpython-311.pyc +0 -0
  7. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/bounds.cpython-311.pyc +0 -0
  8. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/comm_analysis.cpython-311.pyc +0 -0
  9. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/comms.cpython-311.pyc +0 -0
  10. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/compile_fx.cpython-311.pyc +0 -0
  11. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/config.cpython-311.pyc +0 -0
  12. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/constant_folding.cpython-311.pyc +0 -0
  13. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/cpp_builder.cpython-311.pyc +0 -0
  14. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/cpu_vec_isa.cpython-311.pyc +0 -0
  15. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/cudagraph_utils.cpython-311.pyc +0 -0
  16. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/debug.cpython-311.pyc +0 -0
  17. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/decomposition.cpython-311.pyc +0 -0
  18. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/dependencies.cpython-311.pyc +0 -0
  19. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/exc.cpython-311.pyc +0 -0
  20. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/extern_node_serializer.cpython-311.pyc +0 -0
  21. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/freezing.cpython-311.pyc +0 -0
  22. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/fx_utils.cpython-311.pyc +0 -0
  23. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/graph.cpython-311.pyc +0 -0
  24. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/hooks.cpython-311.pyc +0 -0
  25. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/index_propagation.cpython-311.pyc +0 -0
  26. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/inductor_prims.cpython-311.pyc +0 -0
  27. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/jagged_lowerings.cpython-311.pyc +0 -0
  28. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/loop_body.cpython-311.pyc +0 -0
  29. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/metrics.cpython-311.pyc +0 -0
  30. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/mkldnn_ir.cpython-311.pyc +0 -0
  31. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/mkldnn_lowerings.cpython-311.pyc +0 -0
  32. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/ops_handler.cpython-311.pyc +0 -0
  33. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/optimize_indexing.cpython-311.pyc +0 -0
  34. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/quantized_lowerings.cpython-311.pyc +0 -0
  35. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/remote_cache.cpython-311.pyc +0 -0
  36. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/scheduler.cpython-311.pyc +3 -0
  37. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/select_algorithm.cpython-311.pyc +0 -0
  38. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/sizevars.cpython-311.pyc +0 -0
  39. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/subgraph_lowering.cpython-311.pyc +0 -0
  40. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/test_case.cpython-311.pyc +0 -0
  41. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/test_operators.cpython-311.pyc +0 -0
  42. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/virtualized.cpython-311.pyc +0 -0
  43. .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/wrapper_benchmark.cpython-311.pyc +0 -0
  44. .venv/lib/python3.11/site-packages/torch/_inductor/aoti_eager.py +298 -0
  45. .venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/__pycache__/autoheuristic.cpython-311.pyc +0 -0
  46. .venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/__pycache__/learned_heuristic_controller.cpython-311.pyc +0 -0
  47. .venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/__pycache__/learnedheuristic_interface.cpython-311.pyc +0 -0
  48. .venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/artifacts/__init__.py +0 -0
  49. .venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/artifacts/__pycache__/_MMRankingA100.cpython-311.pyc +0 -0
  50. .venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/artifacts/__pycache__/_MMRankingH100.cpython-311.pyc +0 -0
.gitattributes CHANGED
@@ -128,3 +128,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_inductor/_
128
  .venv/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_ops.so.9 filter=lfs diff=lfs merge=lfs -text
129
  .venv/lib/python3.11/site-packages/torch/_export/serde/__pycache__/serialize.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
130
  .venv/lib/python3.11/site-packages/torch/nn/__pycache__/functional.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 
 
128
  .venv/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_ops.so.9 filter=lfs diff=lfs merge=lfs -text
129
  .venv/lib/python3.11/site-packages/torch/_export/serde/__pycache__/serialize.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
130
  .venv/lib/python3.11/site-packages/torch/nn/__pycache__/functional.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
131
+ .venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/scheduler.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
.venv/lib/python3.11/site-packages/torch/_inductor/__init__.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: allow-untyped-defs
2
+ from typing import Any, Dict, List, Optional, Tuple
3
+
4
+ import torch.fx
5
+ import torch.utils._pytree as pytree
6
+
7
+
8
+ __all__ = ["compile", "list_mode_options", "list_options", "cudagraph_mark_step_begin"]
9
+
10
+
11
+ def compile(
12
+ gm: torch.fx.GraphModule,
13
+ example_inputs: List[torch.Tensor],
14
+ options: Optional[Dict[str, Any]] = None,
15
+ ):
16
+ """
17
+ Compile a given FX graph with TorchInductor. This allows compiling
18
+ FX graphs captured without using TorchDynamo.
19
+
20
+ Args:
21
+ gm: The FX graph to compile.
22
+ example_inputs: List of tensor inputs.
23
+ options: Optional dict of config options. See `torch._inductor.config`.
24
+
25
+ Returns:
26
+ Callable with same behavior as gm but faster.
27
+ """
28
+ from .compile_fx import compile_fx
29
+
30
+ return compile_fx(gm, example_inputs, config_patches=options)
31
+
32
+
33
+ def aot_compile(
34
+ gm: torch.fx.GraphModule,
35
+ args: Tuple[Any],
36
+ kwargs: Optional[Dict[str, Any]] = None,
37
+ *,
38
+ options: Optional[Dict[str, Any]] = None,
39
+ ) -> str:
40
+ """
41
+ Ahead-of-time compile a given FX graph with TorchInductor into a shared library.
42
+
43
+ Args:
44
+ gm: The FX graph to compile.
45
+ args: Example arguments
46
+ kwargs: Example keyword arguments
47
+ options: Optional dict of config options. See `torch._inductor.config`.
48
+
49
+ Returns:
50
+ Path to the generated shared library
51
+ """
52
+ from .compile_fx import compile_fx_aot, graph_returns_tuple
53
+
54
+ assert graph_returns_tuple(gm), (
55
+ "Graph output must be a tuple(). This is so that we can avoid "
56
+ "pytree processing of the outputs. Please change the module to "
57
+ "have tuple outputs."
58
+ )
59
+
60
+ # We will serialize the pytree info into the .so as constant strings
61
+ in_spec = None
62
+ out_spec = None
63
+ if isinstance(gm.graph._codegen, torch.fx.graph._PyTreeCodeGen):
64
+ codegen = gm.graph._codegen
65
+ gm.graph._codegen = torch.fx.graph.CodeGen()
66
+ gm.recompile()
67
+
68
+ if codegen.pytree_info.in_spec is not None:
69
+ in_spec = codegen.pytree_info.in_spec
70
+ if codegen.pytree_info.out_spec is not None:
71
+ out_spec = codegen.pytree_info.out_spec
72
+
73
+ else:
74
+ if hasattr(gm, "_in_spec"):
75
+ in_spec = gm._in_spec
76
+ if hasattr(gm, "_out_spec"):
77
+ out_spec = gm._out_spec
78
+
79
+ serialized_in_spec = pytree.treespec_dumps(in_spec) if in_spec is not None else ""
80
+ serialized_out_spec = (
81
+ pytree.treespec_dumps(out_spec) if out_spec is not None else ""
82
+ )
83
+
84
+ flat_args_with_path, received_spec = pytree.tree_flatten_with_path(
85
+ (args, kwargs or {})
86
+ )
87
+
88
+ # Replace non-tensor (constant) inputs with Nones, since these are not being
89
+ # used anyways by the graph
90
+ flat_example_inputs = [
91
+ x[1] if isinstance(x[1], torch.Tensor) else None for x in flat_args_with_path
92
+ ]
93
+
94
+ if in_spec is not None and received_spec != in_spec:
95
+ raise ValueError( # noqa: B904
96
+ "Trying to flatten user inputs with exported input tree spec: \n"
97
+ f"{in_spec}\n"
98
+ "but actually got inputs with tree spec of: \n"
99
+ f"{received_spec}"
100
+ )
101
+
102
+ options = (
103
+ {
104
+ "aot_inductor.serialized_in_spec": serialized_in_spec,
105
+ "aot_inductor.serialized_out_spec": serialized_out_spec,
106
+ }
107
+ if options is None
108
+ else {
109
+ **options,
110
+ "aot_inductor.serialized_in_spec": serialized_in_spec,
111
+ "aot_inductor.serialized_out_spec": serialized_out_spec,
112
+ }
113
+ )
114
+
115
+ return compile_fx_aot(
116
+ gm,
117
+ flat_example_inputs, # type: ignore[arg-type]
118
+ config_patches=options,
119
+ )
120
+
121
+
122
+ def list_mode_options(
123
+ mode: Optional[str] = None, dynamic: Optional[bool] = None
124
+ ) -> Dict[str, Any]:
125
+ r"""Returns a dictionary describing the optimizations that each of the available
126
+ modes passed to `torch.compile()` performs.
127
+
128
+ Args:
129
+ mode (str, optional): The mode to return the optimizations for.
130
+ If None, returns optimizations for all modes
131
+ dynamic (bool, optional): Whether dynamic shape is enabled.
132
+
133
+ Example::
134
+ >>> torch._inductor.list_mode_options()
135
+ """
136
+
137
+ mode_options: Dict[str, Dict[str, bool]] = {
138
+ "default": {},
139
+ # enable cudagraphs
140
+ "reduce-overhead": {
141
+ "triton.cudagraphs": True,
142
+ },
143
+ # enable max-autotune
144
+ "max-autotune-no-cudagraphs": {
145
+ "max_autotune": True,
146
+ },
147
+ # enable max-autotune
148
+ # enable cudagraphs
149
+ "max-autotune": {
150
+ "max_autotune": True,
151
+ "triton.cudagraphs": True,
152
+ },
153
+ }
154
+ return mode_options[mode] if mode else mode_options # type: ignore[return-value]
155
+
156
+
157
+ def list_options() -> List[str]:
158
+ r"""Returns a dictionary describing the optimizations and debug configurations
159
+ that are available to `torch.compile()`.
160
+
161
+ The options are documented in `torch._inductor.config`.
162
+
163
+ Example::
164
+
165
+ >>> torch._inductor.list_options()
166
+ """
167
+
168
+ from torch._inductor import config
169
+
170
+ current_config: Dict[str, Any] = config.shallow_copy_dict()
171
+
172
+ return list(current_config.keys())
173
+
174
+
175
+ def cudagraph_mark_step_begin():
176
+ "Indicates that a new iteration of inference or training is about to begin."
177
+ from .cudagraph_trees import mark_step_begin
178
+
179
+ mark_step_begin()
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (6.4 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/aoti_eager.cpython-311.pyc ADDED
Binary file (16.2 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/async_compile.cpython-311.pyc ADDED
Binary file (15.2 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/autotune_process.cpython-311.pyc ADDED
Binary file (42.1 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/bounds.cpython-311.pyc ADDED
Binary file (8.59 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/comm_analysis.cpython-311.pyc ADDED
Binary file (8.05 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/comms.cpython-311.pyc ADDED
Binary file (28.7 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/compile_fx.cpython-311.pyc ADDED
Binary file (77.3 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/config.cpython-311.pyc ADDED
Binary file (28.3 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/constant_folding.cpython-311.pyc ADDED
Binary file (18.2 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/cpp_builder.cpython-311.pyc ADDED
Binary file (67.5 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/cpu_vec_isa.cpython-311.pyc ADDED
Binary file (16.9 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/cudagraph_utils.cpython-311.pyc ADDED
Binary file (16.6 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/debug.cpython-311.pyc ADDED
Binary file (39.5 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/decomposition.cpython-311.pyc ADDED
Binary file (48.3 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/dependencies.cpython-311.pyc ADDED
Binary file (45.2 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/exc.cpython-311.pyc ADDED
Binary file (7.68 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/extern_node_serializer.cpython-311.pyc ADDED
Binary file (1.72 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/freezing.cpython-311.pyc ADDED
Binary file (16.7 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/fx_utils.cpython-311.pyc ADDED
Binary file (14 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/graph.cpython-311.pyc ADDED
Binary file (97.9 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/hooks.cpython-311.pyc ADDED
Binary file (1.31 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/index_propagation.cpython-311.pyc ADDED
Binary file (23.3 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/inductor_prims.cpython-311.pyc ADDED
Binary file (8.69 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/jagged_lowerings.cpython-311.pyc ADDED
Binary file (10.6 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/loop_body.cpython-311.pyc ADDED
Binary file (37.8 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/metrics.cpython-311.pyc ADDED
Binary file (17.6 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/mkldnn_ir.cpython-311.pyc ADDED
Binary file (62.5 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/mkldnn_lowerings.cpython-311.pyc ADDED
Binary file (41.9 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/ops_handler.cpython-311.pyc ADDED
Binary file (58.7 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/optimize_indexing.cpython-311.pyc ADDED
Binary file (4.82 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/quantized_lowerings.cpython-311.pyc ADDED
Binary file (4.29 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/remote_cache.cpython-311.pyc ADDED
Binary file (11.4 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/scheduler.cpython-311.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48fc35b3ba35cd6f3ba02d218d951aa8a531c58ad217a2e94bfb14483e5a78af
3
+ size 216212
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/select_algorithm.cpython-311.pyc ADDED
Binary file (92.5 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/sizevars.cpython-311.pyc ADDED
Binary file (48.2 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/subgraph_lowering.cpython-311.pyc ADDED
Binary file (10.9 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/test_case.cpython-311.pyc ADDED
Binary file (2.38 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/test_operators.cpython-311.pyc ADDED
Binary file (2.06 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/virtualized.cpython-311.pyc ADDED
Binary file (22 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/__pycache__/wrapper_benchmark.cpython-311.pyc ADDED
Binary file (15.1 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/aoti_eager.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Any, Callable, Dict, List, Optional, Tuple
6
+ from unittest import mock
7
+
8
+ import torch
9
+ import torch._export
10
+ from torch._inductor.utils import is_cpu_device
11
+
12
+ from .runtime.runtime_utils import cache_dir
13
+
14
+
15
+ log = logging.getLogger(__name__)
16
+
17
+
18
+ def aoti_eager_cache_dir(namespace: str, device: str) -> Path:
19
+ return Path(cache_dir()) / "aoti_eager" / namespace / device
20
+
21
+
22
+ def aoti_eager_op_conf_lock(op_func_name_with_overload: str) -> Any:
23
+ from filelock import FileLock
24
+
25
+ # Avoid circular import
26
+ from torch._inductor.codecache import get_lock_dir, LOCK_TIMEOUT
27
+
28
+ op_conf_lock_file = f"{op_func_name_with_overload}.lock"
29
+ lock_dir = get_lock_dir()
30
+ return FileLock(os.path.join(lock_dir, op_conf_lock_file), timeout=LOCK_TIMEOUT)
31
+
32
+
33
+ def load_aoti_eager_cache(
34
+ ns: str, op_func_name_with_overload: str, device_type: str
35
+ ) -> List[Optional[Dict[str, Any]]]:
36
+ device_kernel_cache = aoti_eager_cache_dir(ns, device_type)
37
+ op_conf = device_kernel_cache / f"{op_func_name_with_overload}.json"
38
+ if not op_conf.exists():
39
+ return []
40
+
41
+ try:
42
+ with aoti_eager_op_conf_lock(op_func_name_with_overload):
43
+ with open(op_conf) as f:
44
+ json_data = json.load(f)
45
+ for item in json_data:
46
+ # Get absolution path for kernel library
47
+ kernel_lib_abs_path = device_kernel_cache / item["kernel_path"]
48
+ item["kernel_path"] = kernel_lib_abs_path.as_posix()
49
+
50
+ # Check if the kernel library exists
51
+ if not kernel_lib_abs_path.exists():
52
+ return []
53
+
54
+ for metadata in item["meta_info"]:
55
+ if metadata.get("is_dynamic"):
56
+ raise NotImplementedError(
57
+ "Only support static shape for now"
58
+ )
59
+ if (
60
+ "device_type" in metadata
61
+ and metadata["device_type"] == "cpu"
62
+ ):
63
+ metadata["device_index"] = -1
64
+ for dtype_key in ["dtype", "dtype_value"]:
65
+ if dtype_key in metadata:
66
+ metadata[dtype_key] = getattr(
67
+ torch, metadata[dtype_key].split(".")[-1]
68
+ )
69
+ if "layout_value" in metadata:
70
+ metadata["layout_value"] = getattr(
71
+ torch, metadata["layout_value"].split(".")[-1]
72
+ )
73
+ if "memory_format_value" in metadata:
74
+ metadata["memory_format_value"] = getattr(
75
+ torch, metadata["memory_format_value"].split(".")[-1]
76
+ )
77
+
78
+ return json_data
79
+ except Exception as e:
80
+ err_msg = f"Failed to load aoti eager cache: {e}"
81
+ log.exception(err_msg)
82
+ return []
83
+
84
+
85
+ def supported_builtin_dtype_torch_dtype() -> Dict[type, torch.dtype]:
86
+ return {int: torch.int32, float: torch.float, bool: torch.bool}
87
+
88
+
89
+ def supported_scalar_types() -> Tuple[type, ...]:
90
+ type_to_torch_dtype = supported_builtin_dtype_torch_dtype()
91
+ return tuple(type_to_torch_dtype.keys())
92
+
93
+
94
+ def extract_tensor_metadata(dynamic: bool, input: torch.Tensor) -> Dict[str, Any]:
95
+ metadata: Dict[str, Any] = {}
96
+ metadata["is_dynamic"] = dynamic
97
+
98
+ assert isinstance(input, torch.Tensor)
99
+ metadata["device_type"] = f"{input.device.type}"
100
+ if is_cpu_device([input]):
101
+ metadata["device_index"] = -1
102
+ else:
103
+ metadata["device_index"] = input.device.index
104
+ metadata["dtype"] = f"{input.dtype}"
105
+ metadata["sizes"] = list(input.size())
106
+ metadata["strides"] = list(input.stride())
107
+ metadata["requires_grad"] = input.requires_grad
108
+ metadata["dispatch_key_set"] = torch._C._dispatch_keys(input).raw_repr()
109
+ return metadata
110
+
111
+
112
+ def extract_tensor_list_metadata(
113
+ dynamic: bool,
114
+ input: List[torch.Tensor],
115
+ ) -> Dict[str, Any]:
116
+ metadata_list = []
117
+ for item in input:
118
+ assert isinstance(item, torch.Tensor)
119
+ metadata_list.append(extract_tensor_metadata(dynamic, item))
120
+
121
+ metadata: Dict[str, Any] = {}
122
+ metadata["tensor_list"] = metadata_list
123
+ return metadata
124
+
125
+
126
+ def extract_scalar_metadata(device_type: str, input: Any) -> Dict[str, Any]:
127
+ assert isinstance(input, supported_scalar_types())
128
+ metadata: Dict[str, Any] = {}
129
+ metadata["is_dynamic"] = False
130
+ # Scalar tensor
131
+ metadata["device_type"] = device_type
132
+ metadata["device_index"] = -1 if device_type == "cpu" else 0
133
+ type_to_torch_dtype = supported_builtin_dtype_torch_dtype()
134
+ metadata["dtype"] = f"{type_to_torch_dtype[type(input)]}"
135
+ metadata["scalar_value"] = input
136
+ return metadata
137
+
138
+
139
+ def extract_string_metadata(input: str) -> Dict[str, Any]:
140
+ assert isinstance(input, str)
141
+ metadata: Dict[str, Any] = {}
142
+ metadata["string_value"] = input
143
+ return metadata
144
+
145
+
146
+ def extract_dtype_metadata(input: torch.dtype) -> Dict[str, Any]:
147
+ assert isinstance(input, torch.dtype)
148
+ metadata: Dict[str, Any] = {}
149
+ metadata["dtype_value"] = f"{input}"
150
+ return metadata
151
+
152
+
153
+ def extract_device_metadata(input: torch.device) -> Dict[str, Any]:
154
+ assert isinstance(input, torch.device)
155
+ metadata: Dict[str, Any] = {}
156
+ metadata["device_type_value"] = f"{input.type}"
157
+ metadata["device_index_value"] = input.index
158
+ return metadata
159
+
160
+
161
+ def extract_layout_metadata(input: torch.layout) -> Dict[str, Any]:
162
+ assert isinstance(input, torch.layout)
163
+ metadata: Dict[str, Any] = {}
164
+ metadata["layout_value"] = f"{input}"
165
+ return metadata
166
+
167
+
168
+ def aoti_compile_with_persistent_cache(
169
+ ns: str,
170
+ op_func_name_with_overload: str,
171
+ device_type: str,
172
+ dynamic: bool,
173
+ f: Callable[..., Any],
174
+ args: Tuple[Any],
175
+ kwargs: Dict[str, Any],
176
+ *,
177
+ dynamic_shapes: Optional[Dict[str, Any]] = None,
178
+ options: Optional[Dict[str, Any]] = None,
179
+ remove_runtime_assertions: bool = False,
180
+ disable_constraint_solver: bool = False,
181
+ ) -> str:
182
+ """
183
+ Compile the given function with persistent cache for AOTI eager mode.
184
+ """
185
+ assert not dynamic, "Only support static shape for now"
186
+ flattened_inputs = list(args) + list(kwargs.values())
187
+ if not all(
188
+ isinstance(
189
+ input,
190
+ (
191
+ supported_scalar_types(),
192
+ torch.Tensor,
193
+ list,
194
+ str,
195
+ torch.dtype,
196
+ torch.device,
197
+ torch.layout,
198
+ ),
199
+ )
200
+ for input in flattened_inputs
201
+ ):
202
+ err_msg = f"Unsupported input types: {flattened_inputs}"
203
+ log.exception(err_msg)
204
+ raise NotImplementedError(err_msg)
205
+
206
+ for input in flattened_inputs:
207
+ if isinstance(input, list) and not all(
208
+ isinstance(item, torch.Tensor) for item in input
209
+ ):
210
+ err_msg = f"_impl_with_aoti_compile encounters unsupported input types: {flattened_inputs}"
211
+ log.exception(err_msg)
212
+ raise NotImplementedError(err_msg)
213
+
214
+ persistent_cache = aoti_eager_cache_dir(ns, device_type)
215
+ if not persistent_cache.exists():
216
+ persistent_cache.mkdir(parents=True)
217
+
218
+ persistent_cache_lib = persistent_cache / "lib"
219
+ if not persistent_cache_lib.exists():
220
+ persistent_cache_lib.mkdir()
221
+
222
+ with mock.patch.dict(
223
+ os.environ,
224
+ {"TORCHINDUCTOR_CACHE_DIR": persistent_cache_lib.absolute().as_posix()},
225
+ ):
226
+ try:
227
+ kernel_lib_path = torch._export.aot_compile(
228
+ f,
229
+ args,
230
+ kwargs,
231
+ dynamic_shapes=dynamic_shapes,
232
+ remove_runtime_assertions=remove_runtime_assertions,
233
+ disable_constraint_solver=disable_constraint_solver,
234
+ # Some operations may have non-Tensor parameters like int, float, bool. These
235
+ # non-Tensor parameters will not be the input of the graph. Therefore, we do
236
+ # need to keep the same signature.
237
+ same_signature=False,
238
+ )
239
+
240
+ kernel_metadata_items = []
241
+
242
+ for idx, input in enumerate(flattened_inputs):
243
+ if isinstance(input, torch.Tensor):
244
+ metadata = extract_tensor_metadata(dynamic, input)
245
+ elif isinstance(input, list):
246
+ assert all(isinstance(item, torch.Tensor) for item in input)
247
+ metadata = extract_tensor_list_metadata(dynamic, input)
248
+ elif isinstance(input, supported_scalar_types()):
249
+ metadata = extract_scalar_metadata(device_type, input)
250
+ elif isinstance(input, str):
251
+ metadata = extract_string_metadata(input)
252
+ elif isinstance(input, torch.dtype):
253
+ metadata = extract_dtype_metadata(input)
254
+ elif isinstance(input, torch.device):
255
+ metadata = extract_device_metadata(input)
256
+ elif isinstance(input, torch.layout):
257
+ metadata = extract_layout_metadata(input)
258
+ else:
259
+ raise NotImplementedError(f"Unsupported input type: {type(input)}")
260
+
261
+ metadata["arg_order"] = idx
262
+ kernel_metadata_items.append(metadata)
263
+
264
+ kernel_meta_info: Dict[str, Any] = {}
265
+ kernel_meta_info["meta_info"] = kernel_metadata_items
266
+ kernel_meta_info["kernel_path"] = (
267
+ Path(kernel_lib_path).relative_to(persistent_cache).as_posix()
268
+ )
269
+
270
+ json_data = []
271
+ update_json = True
272
+ op_conf = persistent_cache / f"{op_func_name_with_overload}.json"
273
+ mode = "r" if op_conf.exists() else "w"
274
+ with aoti_eager_op_conf_lock(op_func_name_with_overload):
275
+ with open(op_conf, mode) as op_conf_file:
276
+ try:
277
+ json_data = json.load(op_conf_file)
278
+ except Exception as e:
279
+ json_data = []
280
+
281
+ assert isinstance(json_data, list)
282
+ for item in json_data:
283
+ assert isinstance(item, dict)
284
+ # Same kernel meta info already exists in the json file
285
+ if item["meta_info"] == kernel_metadata_items:
286
+ update_json = False
287
+ break
288
+
289
+ if update_json:
290
+ json_data.append(kernel_meta_info)
291
+ with open(op_conf, "w") as op_conf_file:
292
+ json.dump(json_data, op_conf_file, indent=4)
293
+
294
+ return kernel_lib_path
295
+ except Exception as e:
296
+ err_msg = f"Failed to compile {op_func_name_with_overload}: {e}"
297
+ log.exception(err_msg)
298
+ return ""
.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/__pycache__/autoheuristic.cpython-311.pyc ADDED
Binary file (17.5 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/__pycache__/learned_heuristic_controller.cpython-311.pyc ADDED
Binary file (5.86 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/__pycache__/learnedheuristic_interface.cpython-311.pyc ADDED
Binary file (6.44 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/artifacts/__init__.py ADDED
File without changes
.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/artifacts/__pycache__/_MMRankingA100.cpython-311.pyc ADDED
Binary file (38.3 kB). View file
 
.venv/lib/python3.11/site-packages/torch/_inductor/autoheuristic/artifacts/__pycache__/_MMRankingH100.cpython-311.pyc ADDED
Binary file (40.9 kB). View file