cranky-coder08 commited on Sep 10

Commit

c1af2fa

verified ·

1 Parent(s): f4cade0

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +7 -0
phivenv/Lib/site-packages/pip/_vendor/pkg_resources/__pycache__/__init__.cpython-39.pyc +3 -0
phivenv/Lib/site-packages/pip/_vendor/rich/__pycache__/_emoji_codes.cpython-39.pyc +3 -0
phivenv/Lib/site-packages/pkg_resources/__pycache__/__init__.cpython-39.pyc +3 -0
phivenv/Lib/site-packages/pkg_resources/_vendor/__pycache__/pyparsing.cpython-39.pyc +3 -0
phivenv/Lib/site-packages/regex/__pycache__/_regex_core.cpython-39.pyc +3 -0
phivenv/Lib/site-packages/regex/__pycache__/test_regex.cpython-39.pyc +3 -0
phivenv/Lib/site-packages/regex/_regex.cp39-win_amd64.pyd +3 -0
phivenv/Lib/site-packages/torch/include/ATen/core/Formatting.h +25 -0
phivenv/Lib/site-packages/torch/include/ATen/core/Generator.h +191 -0
phivenv/Lib/site-packages/torch/include/ATen/core/GeneratorForPrivateuseone.h +39 -0
phivenv/Lib/site-packages/torch/include/ATen/core/IListRef.h +631 -0
phivenv/Lib/site-packages/torch/include/ATen/core/IListRef_inl.h +203 -0
phivenv/Lib/site-packages/torch/include/ATen/core/LegacyTypeDispatch.h +111 -0
phivenv/Lib/site-packages/torch/include/ATen/core/List.h +491 -0
phivenv/Lib/site-packages/torch/include/ATen/core/List_inl.h +353 -0
phivenv/Lib/site-packages/torch/include/ATen/core/MT19937RNGEngine.h +194 -0
phivenv/Lib/site-packages/torch/include/ATen/core/NamedTensor.h +143 -0
phivenv/Lib/site-packages/torch/include/ATen/core/NestedIntSymNodeImpl.h +187 -0
phivenv/Lib/site-packages/torch/include/ATen/core/PhiloxRNGEngine.h +240 -0
phivenv/Lib/site-packages/torch/include/ATen/core/PythonFallbackKernel.h +35 -0
phivenv/Lib/site-packages/torch/include/ATen/core/PythonOpRegistrationTrampoline.h +22 -0
phivenv/Lib/site-packages/torch/include/ATen/core/QuantizerBase.h +84 -0
phivenv/Lib/site-packages/torch/include/ATen/core/Range.h +25 -0
phivenv/Lib/site-packages/torch/include/ATen/core/Reduction.h +14 -0
phivenv/Lib/site-packages/torch/include/ATen/core/Scalar.h +1 -0
phivenv/Lib/site-packages/torch/include/ATen/core/ScalarType.h +1 -0
phivenv/Lib/site-packages/torch/include/ATen/core/Tensor.h +98 -0
phivenv/Lib/site-packages/torch/include/ATen/core/TensorAccessor.h +275 -0
phivenv/Lib/site-packages/torch/include/ATen/core/TensorBase.h +1056 -0
phivenv/Lib/site-packages/torch/include/ATen/core/TensorBody.h +0 -0
phivenv/Lib/site-packages/torch/include/ATen/core/TorchDispatchUtils.h +17 -0
phivenv/Lib/site-packages/torch/include/ATen/core/TransformationHelper.h +175 -0
phivenv/Lib/site-packages/torch/include/ATen/core/UndefinedTensorImpl.h +1 -0
phivenv/Lib/site-packages/torch/include/ATen/core/UnsafeFromTH.h +21 -0
phivenv/Lib/site-packages/torch/include/ATen/core/VariableHooksInterface.h +83 -0
phivenv/Lib/site-packages/torch/include/ATen/core/Variadic.h +92 -0
phivenv/Lib/site-packages/torch/include/ATen/core/Vitals.h +94 -0
phivenv/Lib/site-packages/torch/include/ATen/core/boxing/BoxedKernel.h +213 -0
phivenv/Lib/site-packages/torch/include/ATen/core/boxing/BoxedKernel_impl.h +106 -0
phivenv/Lib/site-packages/torch/include/ATen/core/boxing/KernelFunction.h +283 -0
phivenv/Lib/site-packages/torch/include/ATen/core/boxing/KernelFunction_impl.h +320 -0
phivenv/Lib/site-packages/torch/include/ATen/core/boxing/OperatorKernel.h +27 -0
phivenv/Lib/site-packages/torch/include/ATen/core/boxing/impl/WrapFunctionIntoFunctor.h +38 -0
phivenv/Lib/site-packages/torch/include/ATen/core/boxing/impl/WrapFunctionIntoRuntimeFunctor.h +41 -0
phivenv/Lib/site-packages/torch/include/ATen/core/boxing/impl/boxing.h +410 -0
phivenv/Lib/site-packages/torch/include/ATen/core/boxing/impl/make_boxed_from_unboxed_functor.h +785 -0
phivenv/Lib/site-packages/torch/include/ATen/core/boxing/impl/test_helpers.h +140 -0
phivenv/Lib/site-packages/torch/include/ATen/core/dispatch/CppSignature.h +67 -0
phivenv/Lib/site-packages/torch/include/ATen/core/dispatch/DispatchKeyExtractor.h +279 -0

.gitattributes CHANGED Viewed

@@ -54,3 +54,10 @@ phivenv/Lib/site-packages/pip/_vendor/distlib/t64.exe filter=lfs diff=lfs merge=
 phivenv/Lib/site-packages/pip/_vendor/distlib/w64-arm.exe filter=lfs diff=lfs merge=lfs -text
 phivenv/Lib/site-packages/pip/_vendor/idna/__pycache__/uts46data.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text
 phivenv/Lib/site-packages/pip/_vendor/pyparsing/__pycache__/core.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text

 phivenv/Lib/site-packages/pip/_vendor/distlib/w64-arm.exe filter=lfs diff=lfs merge=lfs -text
 phivenv/Lib/site-packages/pip/_vendor/idna/__pycache__/uts46data.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text
 phivenv/Lib/site-packages/pip/_vendor/pyparsing/__pycache__/core.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/pip/_vendor/pkg_resources/__pycache__/__init__.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/pip/_vendor/rich/__pycache__/_emoji_codes.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/pkg_resources/_vendor/__pycache__/pyparsing.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/pkg_resources/__pycache__/__init__.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/regex/_regex.cp39-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/regex/__pycache__/test_regex.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text
+phivenv/Lib/site-packages/regex/__pycache__/_regex_core.cpython-39.pyc filter=lfs diff=lfs merge=lfs -text

phivenv/Lib/site-packages/pip/_vendor/pkg_resources/__pycache__/__init__.cpython-39.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9ae9e3e39533b703fa0fb49576d02a073be55a6fbe3f9d9a38cbeb9ed03e116
+size 100308

phivenv/Lib/site-packages/pip/_vendor/rich/__pycache__/_emoji_codes.cpython-39.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88b150085f0eb6dcd1c70d632b16ccf923b66e1700800a4756d06b3726b91fcf
+size 132673

phivenv/Lib/site-packages/pkg_resources/__pycache__/__init__.cpython-39.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92b5449a62f76826fcde2e62b85c16f953a7ccfff8847bc4854a098b5a954dae
+size 100411

phivenv/Lib/site-packages/pkg_resources/_vendor/__pycache__/pyparsing.cpython-39.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f26434c485b7881d3ef563e57c88a171319a39cfcc3bf348cbe5bfd0d2a9887
+size 201319

phivenv/Lib/site-packages/regex/__pycache__/_regex_core.cpython-39.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5727abd2cd4972398036f183a2e811e78ffa31946bf89c453917a171a61c12aa
+size 114484

phivenv/Lib/site-packages/regex/__pycache__/test_regex.cpython-39.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa833453940a5409176fe65a5ba338e66d9d875a4a905f92c064b1ade0faba66
+size 140105

phivenv/Lib/site-packages/regex/_regex.cp39-win_amd64.pyd ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:72ee579e80fb57b5b52f1a5a44b4dcbf85567e43442ad80f9da51f21e2f9977f
+size 723968

phivenv/Lib/site-packages/torch/include/ATen/core/Formatting.h ADDED Viewed

	@@ -0,0 +1,25 @@

+#pragma once
+#include <ostream>
+#include <string>
+#include <c10/core/Scalar.h>
+#include <ATen/core/Tensor.h>
+namespace c10 {
+TORCH_API std::ostream& operator<<(std::ostream& out, Backend b);
+TORCH_API std::ostream& operator<<(std::ostream & out, const Scalar& s);
+TORCH_API std::string toString(const Scalar& s);
+}
+namespace at {
+TORCH_API std::ostream& operator<<(std::ostream& out, const DeprecatedTypeProperties& t);
+TORCH_API std::ostream& print(
+    std::ostream& stream,
+    const Tensor& tensor,
+    int64_t linesize);
+inline std::ostream& operator<<(std::ostream & out, const Tensor & t) {
+  return print(out,t,80);
+}
+TORCH_API void print(const Tensor & t, int64_t linesize=80);
+}

phivenv/Lib/site-packages/torch/include/ATen/core/Generator.h ADDED Viewed

	@@ -0,0 +1,191 @@

+#pragma once
+#include <cstdint>
+#include <deque>
+#include <mutex>
+#include <utility>
+#include <c10/util/Exception.h>
+#include <c10/util/intrusive_ptr.h>
+#include <c10/core/Device.h>
+#include <c10/core/DispatchKeySet.h>
+// For the record I don't think this is a correct pimpl idiom.
+// Including Impl header in interface header defeats the purpose
+// because you can't change Impl private members without forcing
+// everything that included the interface to rebuild.
+// Impl should be forward-declared in the interface header instead.
+#include <c10/core/GeneratorImpl.h>
+/**
+ * Note [Generator]
+ * ~~~~~~~~~~~~~~~~
+ * A Pseudo Random Number Generator (PRNG) is an engine that uses an algorithm to
+ * generate a seemingly random sequence of numbers, that may be later be used in creating
+ * a random distribution. Such an engine almost always maintains a state and requires a
+ * seed to start off the creation of random numbers. Often times, users have
+ * found it beneficial to be able to explicitly create, retain, and destroy
+ * PRNG states and also be able to have control over the seed value.
+ *
+ * A Generator in ATen gives users the ability to read, write and modify a PRNG engine.
+ * For instance, it does so by letting users seed a PRNG engine, fork the state of the
+ * engine, etc.
+ *
+ * By default, there is one generator per device, and a device's generator is
+ * lazily created. A user can use the torch.Generator() api to create their own generator.
+ */
+/**
+ * Note [Acquire lock when using random generators]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Generator and its derived classes are NOT thread-safe. Please note that most of the
+ * places where we have inserted locking for generators are historically based, and we
+ * haven't actually checked that everything is truly thread safe (and it probably isn't).
+ * Please use the public mutex_ when using any methods from these classes, except for the
+ * read-only methods. You can learn about the usage by looking into the unittests
+ * (aten/src/ATen/cpu_generator_test.cpp) and other places where we have used lock_guard.
+ *
+ * TODO: Look into changing the threading semantics of Generators in ATen (e.g., making
+ * them non-thread safe and instead making the generator state splittable, to accommodate
+ * forks into other threads).
+ */
+namespace at {
+class Tensor;
+struct TORCH_API Generator {
+  Generator() = default;
+  explicit Generator(c10::intrusive_ptr<c10::GeneratorImpl> gen_impl)
+   : impl_(std::move(gen_impl)) {
+    if (impl_.get() == nullptr) {
+      throw std::runtime_error("GeneratorImpl with nullptr is not supported");
+    }
+  }
+  bool operator==(const Generator& rhs) const {
+    return this->impl_ == rhs.impl_;
+  }
+  bool operator!=(const Generator& rhs) const {
+    return !((*this) == rhs);
+  }
+  bool defined() const {
+    return static_cast<bool>(impl_);
+  }
+  c10::GeneratorImpl* unsafeGetGeneratorImpl() const {
+    return impl_.get();
+  }
+  c10::GeneratorImpl* unsafeReleaseGeneratorImpl() {
+    return impl_.release();
+  }
+  const c10::intrusive_ptr<c10::GeneratorImpl>& getIntrusivePtr() const {
+    return impl_;
+  }
+  void set_current_seed(uint64_t seed) { impl_->set_current_seed(seed); }
+  // Sets the offset of Generator state to the desired offset. This is currently
+  // supported for only Philox based Generators, i.e., CUDA and MPS.
+  void set_offset(uint64_t offset) { impl_->set_offset(offset); }
+  // Returns the offset of Generator state. This is currently supported for only
+  // Philox based Generators, i.e., CUDA and MPS.
+  uint64_t get_offset() const { return impl_->get_offset(); }
+  uint64_t current_seed() const { return impl_->current_seed(); }
+  uint64_t seed() { return impl_->seed(); }
+  // Implementation not inlined to prevent cycle reference between
+  // `ATen/core/Generator.h` and `ATen/core/Tensor.h`
+  void set_state(const at::Tensor& new_state);
+  at::Tensor get_state() const;
+  void graphsafe_set_state(const Generator& new_state);
+  Generator graphsafe_get_state() const;
+  std::mutex& mutex() {
+    return impl_->mutex_;
+  }
+  DispatchKeySet key_set() const {
+    return impl_->key_set();
+  }
+  Device device() const { return impl_->device(); }
+  inline void set_pyobj(PyObject* pyobj) const noexcept {
+    impl_->set_pyobj(pyobj);
+  }
+  inline PyObject* pyobj() const noexcept {
+    return impl_->pyobj();
+  }
+  template<typename T>
+  T* get() const { return static_cast<T*>(impl_.get()); }
+  Generator clone() const {
+    return Generator(impl_->clone());
+  }
+ private:
+  c10::intrusive_ptr<c10::GeneratorImpl> impl_;
+};
+template<class Impl, class... Args>
+Generator make_generator(Args&&... args) {
+  return Generator(c10::make_intrusive<Impl>(std::forward<Args>(args)...));
+}
+/**
+ * Utility function to static cast input Generator* to
+ * the backend generator type (CPU/CUDAGeneratorImpl etc.)
+ */
+template <typename T>
+inline T * check_generator(std::optional<Generator> gen) {
+  TORCH_CHECK(gen.has_value(), "Expected Generator but received nullopt");
+  TORCH_CHECK(gen->defined(), "Generator with undefined implementation is not allowed");
+  TORCH_CHECK(T::device_type() == gen->device().type(), "Expected a '", T::device_type(), "' device type for generator but found '", gen->device().type(), "'");
+  return gen->get<T>();
+}
+/**
+ * Utility function used in tensor implementations, which
+ * supplies the default generator to tensors, if an input generator
+ * is not supplied. The input Generator* is also static casted to
+ * the backend generator type (CPU/CUDAGeneratorImpl etc.)
+ */
+template <typename T>
+inline T* get_generator_or_default(const std::optional<Generator>& gen, const Generator& default_gen) {
+  return gen.has_value() && gen->defined() ? check_generator<T>(gen) : check_generator<T>(default_gen);
+}
+namespace detail {
+/**
+ * Helper function for checking the validity of new random generator
+ * state. Right now following conditions are checked:
+ *
+ * - The new state tensor must be a torch.ByteTensor
+ * - Data of the new state tensor must be contiguous
+ */
+inline void check_rng_state(const c10::TensorImpl& new_state) {
+  TORCH_CHECK_TYPE(
+    new_state.layout() == kStrided && new_state.device().type() == kCPU && new_state.dtype() == kByte,
+    "RNG state must be a torch.ByteTensor"
+  );
+  TORCH_CHECK(new_state.is_contiguous(), "RNG state must be contiguous");
+}
+} // namespace detail
+} // namespace at

phivenv/Lib/site-packages/torch/include/ATen/core/GeneratorForPrivateuseone.h ADDED Viewed

	@@ -0,0 +1,39 @@

+#pragma once
+#include <ATen/core/Generator.h>
+#include <c10/util/intrusive_ptr.h>
+namespace at {
+using GeneratorFuncType = std::function<at::Generator(c10::DeviceIndex)>;
+TORCH_API std::optional<GeneratorFuncType>& GetGeneratorPrivate();
+class TORCH_API _GeneratorRegister {
+ public:
+  explicit _GeneratorRegister(const GeneratorFuncType& func);
+};
+TORCH_API at::Generator GetGeneratorForPrivateuse1(
+    c10::DeviceIndex device_index);
+/**
+ * This is used to register Generator to PyTorch for `privateuse1` key.
+ *
+ * Usage: REGISTER_GENERATOR_PRIVATEUSE1(MakeGeneratorForPrivateuse1)
+ *
+ * class CustomGeneratorImpl : public c10::GeneratorImpl {
+ *   CustomGeneratorImpl(DeviceIndex device_index = -1);
+ *   explicit ~CustomGeneratorImpl() override = default;
+ *   ...
+ * };
+ *
+ * at::Generator MakeGeneratorForPrivateuse1(c10::DeviceIndex id) {
+ *   return at::make_generator<CustomGeneratorImpl>(id);
+ * }
+ */
+#define REGISTER_GENERATOR_PRIVATEUSE1(GeneratorPrivate) \
+  static auto temp##GeneratorPrivate = at::_GeneratorRegister(GeneratorPrivate);
+} // namespace at

phivenv/Lib/site-packages/torch/include/ATen/core/IListRef.h ADDED Viewed

	@@ -0,0 +1,631 @@

+#pragma once
+#include <ATen/core/ivalue_to.h>
+#include <c10/util/ArrayRef.h>
+#include <c10/util/Exception.h>
+#include <functional>
+#include <initializer_list>
+#include <iterator>
+#include <type_traits>
+/*
+ * [Note: IListRef]
+ * Wrapper around different API containers (e.g. boxed and unboxed).
+ *
+ * What is it?
+ * ===========
+ * It is a tagged union of both boxed and unboxed API containers.
+ * Working implementations:
+ *
+ * - `IListRef<at::Tensor>`
+ * - `IListRef<at::OptionalTensorRef>`
+ *
+ * Note that `IListRef` is a view type. Meaning that it won't own the
+ * tensors it holds. It's intended to be used only as argument parameters.
+ * Specifically, where these 2 worlds overlap.
+ *
+ * What is this for?
+ * =================
+ * Historically, PyTorch has maintained 2 different APIs: the unboxed
+ * (called from C++ API and Python eager mode) and boxed APIs (called
+ * from the TorchScript JIT, mobile interpreter, and boxed fallbacks).
+ *
+ * Calling unboxed kernels from the boxed "world" and vice-versa may
+ * result in non-negligible overhead. Lists are one of those types:
+ *
+ * - Boxed world: `c10::List`
+ * - Unboxed world: `c10::ArrayRef`
+ *
+ * In this context, `c10::IListRef` solves this problem by wrapping those
+ * 2 container types, so that we don't need to convert from one to
+ * the other.
+ *
+ * (see https://github.com/pytorch/pytorch/issues/66328)
+ *
+ * What does it do?
+ * ================
+ * This container wraps around the different tagged containers
+ * (currently, only boxed and unboxed), without incurring in extra
+ * overhead for converting from one to another. It does so while
+ * exposing usual container methods, which dispatch to corresponding
+ * implementations.
+ *
+ * While it works with different container types, it introduces
+ * overhead for repeatedly calling member functions (since those will
+ * get dispatched, again). Therefore, you should only use it to iterate
+ * through the list up to one time. If you need to do more complex things,
+ * call `materialize()` first.
+ *
+ * Adding support for a new Tag
+ * ============================
+ * Suppose we want to add a new tag: `Chest`. Here are the steps
+ * we would have to go through:
+ *
+ * 1. Add a line for it in the macro `TORCH_ILISTREF_FORALL_TAGS`.
+ *
+ *   #define TORCH_ILISTREF_FORALL_TAGS(_, ...) \
+ *     ...
+ *     _(Chest, ##__VA_ARGS__)
+ *
+ * 2. Add type aliases, union members, and constructors.
+ *
+ *   template <typename T>
+ *   class IListRef {
+ *     ...
+ *     using chest_type =
+ *       typename detail::IListRefTagImpl<T, IListRefTag::Chest>::list_type;
+ *     ...
+ *     IListRef(...) : tag_(IListRefTag::Chest) {
+ *       ...
+ *     }
+ *     ...
+ *     union Payload {
+ *       ...
+ *       chest_type chest;
+ *       ...
+ *     };
+ *     ...
+ *   };
+ *
+ * 3. Add a default implementation for it (in 'IListRef_inl.h'). It's
+ *    preferable to make the default implementation work for `T = Tensor`
+ *    (both `Unboxed` and `Boxed` do it).
+ *
+ *   template <typename T, typename ListElemT>
+ *   class IListRefTagImplBase<IListRefTag::Chest, T, ListElemT> {
+ *    public:
+ *     using elem_type = ListElemT;
+ *     using list_type = ChestContainer<elem_type>;
+ *
+ *     static const list_type& unwrap(const IListRef<T>& ilist) { ... }
+ *
+ *     static typename list_type::const_iterator& unwrap(
+ *         IListRefIterator<T>& it) { ... }
+ *
+ *     static const typename list_type::const_iterator& unwrap(
+ *         const IListRefIterator<T>& it) { ... }
+ *
+ *     static IListRefConstRef<T> iterator_get(
+ *         const typename list_type::const_iterator& it) { ... }
+ *   }
+ *
+ * 4. Add an specialization for each of the already supported types.
+ *    Finally, for consistency, add them to the tracking list.
+ *    (see [Note: IListRefTagImpl Specializations])
+ *
+ *   template <>
+ *   class IListRefTagImpl<IListRefTag::Chest, at::Tensor>
+ *       : public IListRefTagImplBase<IListRefTag::Chest, at::Tensor> {};
+ *
+ * Adding support for a new Type
+ * =============================
+ * Suppose we want to add support for a new type: `Matrix`.
+ * Here are the steps we would have to go through:
+ *
+ * 1. Add an specialization for each of the existing tags.
+ *    For consistency, add them to the tracking list.
+ *    (see [Note: IListRefTagImpl Specializations])
+ *
+ *   template <>
+ *   class IListRefTagImpl<IListRefTag::Unboxed, Matrix>
+ *       : public IListRefTagImplBase<IListRefTag::Unboxed, Matrix> {};
+ *
+ *   template <>
+ *   class IListRefTagImpl<Matrix, IListRefTag::Boxed>
+ *       : public IListRefTagImplBase<IListRefTag::Boxed, Matrix> {};
+ *
+ * Common Problems
+ * ===============
+ * 1. One of `IListRef(Iterator)` methods are failing to compile.
+ *
+ *     That may be happening because the container type you added
+ *     is not compatible with the code written for that method. If
+ *     that's true, then you might have to transform that code into
+ *     a static method call (see `List::operator[]` method).
+ *
+ * 2. Can't make `IListRefIterator<T>::operator*` return a const-reference.
+ *
+ *    First, keep in mind that we assume that boxed containers will
+ *    have to deal with `IValue` (e.g. `c10::List`). In this context,
+ *    what may be happening is that `IValue` doesn't store internally
+ *    your type `T`. Instead, it constructs a type new `T` everytime
+ *    you try to get `T` for it (see `IListRef<at::OptinalTensorRef>`).
+ */
+namespace c10 {
+template <typename T>
+class IListRef;
+/*
+ * Applies arbitrary macros to each `IListRefTag`.
+ */
+#define TORCH_ILISTREF_FORALL_TAGS(_, ...) \
+  _(Unboxed, ##__VA_ARGS__)                \
+  _(Boxed, ##__VA_ARGS__)                  \
+  _(Materialized, ##__VA_ARGS__)
+/*
+ * Defines a "switch-case" for `TAG`. Inside, it executes `BODY`,
+ * while bringing to scope:
+ *
+ * - `ImplT`: the implementation class for `TAG`
+ * - `this_`: the result of unwrapping `this`
+ */
+#define TORCH_ILISTREF_UNWRAP_CASE(TAG, BODY)                        \
+  case c10::IListRefTag::TAG: {                                      \
+    using ImplT = c10::detail::IListRefTagImpl<IListRefTag::TAG, T>; \
+    auto& this_ = ImplT::unwrap(*this);                              \
+    BODY                                                             \
+  } break;
+/*
+ * Dispatches the unwrap call, depending on `TAG`, followed by
+ * the execution of `BODY`. It aborts if `TAG` is not a `IListRefTag`.
+ *
+ * This macro is useful because it allows us to handle different
+ * types (that correspond to different tags) to be implemented
+ * only once. We can do it even when the implementation of the
+ * different tags aren't syntatically the same, by dispatching
+ * it to a function (e.g. `ImplT::<dispatch-function>(this_)`).
+ */
+#define TORCH_ILISTREF_UNWRAP(TAG, BODY)                         \
+  switch (TAG) {                                                 \
+    TORCH_ILISTREF_FORALL_TAGS(TORCH_ILISTREF_UNWRAP_CASE, BODY) \
+    break;                                                       \
+    default:                                                     \
+      TORCH_INTERNAL_ASSERT(false, "invalid IListRef tag.");     \
+  }
+enum class IListRefTag {
+#define DEFINE_TAG(tag, ...) tag,
+  TORCH_ILISTREF_FORALL_TAGS(DEFINE_TAG)
+#undef DEFINE_TAG
+      None
+};
+namespace detail {
+/*
+ * Type alias that specifies whether we return a reference or a copy of `T`.
+ *
+ * What is this for?
+ * =================
+ * Since values in the boxed world are represented by an `IValue`, we also
+ * depend on whether it can be converted to a const-reference (`Tensor`) or
+ * has to create a new copy of `T` (`OptionalTensorRef`).
+ */
+template <typename T>
+using IListRefConstRef = typename ivalue_to_const_ref_overload_return<T>::type;
+/*
+ * Interface that implements key functions for each `IListRefTag` type.
+ *
+ * What is this for?
+ * =================
+ * Given an `IListRef(Iterator)<T>`, some methods have to be implemented
+ * differently for each `TAG`. Therefore, the methods inside this class
+ * are used as dispatch targets for the different `IListRefTag` values.
+ *
+ * You should create an specialization of this class for each possible
+ * combination of `IListRefTag` type (except `None`) and element types
+ * (e.g. `Tensor`).
+ *
+ * What does it do?
+ * ================
+ * 1. defines static methods to be used as dispatch targets by both
+ *    `IListRef<T>` and `IListRefIterator<T>` (see the implementation of
+ *    `IListRefTagImplBase`).
+ *
+ * 2. defines the `elem_type` and `list_type` aliases that will be
+ *    used in the definition of `IListRef<T>`. In general, we should do
+ *    so by inheriting from `IListRefTagImplBase<TAG, T, ListElemT>`.
+ *
+ * [Note: IListRefTagImpl Specialization]
+ * ======================================
+ * For `IListRef(Iterator)<at::Tensor>`:
+ * - <IListRefTag::Unboxed, at::Tensor>
+ * - <IListRefTag::Boxed, at::Tensor>
+ * - <IListRefTag::Materialized, at::Tensor>
+ *
+ * For `IListRef(Iterator)<at::OptionalTensorRef>`:
+ * - <IListRefTag::Unboxed, at::OptionalTensorRef>
+ * - <IListRefTag::Boxed, at::OptionalTensorRef>
+ * - <IListRefTag::Materialized, at::OptionalTensorRef>
+ */
+template <IListRefTag TAG, typename T>
+class IListRefTagImpl {};
+/*
+ * Base implementation of `IListRefTagImpl<TAG, T>` methods.
+ *
+ * What is this for?
+ * =================
+ * This should make adding specializations for new types easier. For
+ * example, one should be able to add a new type just by making its
+ * `IListRefTagImpl` specialization inherit from `IListRefTagImplBase`.
+ *
+ * You should create a partial specialization for this class only if
+ * you introduce a new `IListRefTag`. The idea being that there is one
+ * default implementation for each possible value of `IListRefTag`.
+ *
+ * What does it do?
+ * ================
+ * 1. defines `elem_type` as an alias to `ListElemT`.
+ *
+ * 1. defines `list_type` as an alias to the default container type
+ *    that will hold a collection of `elem_type`. The idea being that
+ *    all types tagged as `TAG` will have `list_type` as its container,
+ *    with different `elem_type`.
+ *
+ * 3. defines the default implementation for each of the methods that
+ *    are supposed to be defined on `IListRefTagImpl` specializations.
+ *
+ * 4. inheriting from `IListRefTagImplBase<TAG, T, ListElemT>` also means
+ *    that the payload of the type `IListRef<T>` will be of type `list_type`
+ *    when it is tagged as `TAG`.
+ */
+template <IListRefTag TAG, typename T, typename ListElemT = T>
+class IListRefTagImplBase {};
+/*
+ * Materialized container for `IListRef<T>`.
+ *
+ * What is this for?
+ * =================
+ * Container that groups `T` references together. This exchanges the
+ * overhead of every method call from `IListRef<T>` for a dynamic allocation.
+ *
+ * You should use this container instead of `IListRef<T>` if:
+ *
+ *   - You are going to iterate the list more than once
+ *   - You need to repeatedly access arbitrary elements (using `operator[]`)
+ * What does it do?
+ * ================
+ * Removes the reference (&) from the type, and wraps it into a
+ * `std::reference_wrapper`. If `IListRefConstRef<T>` is not a
+ * reference type, then it's left unchanged.
+ */
+template <typename T>
+using _MaterializedIListRefElem = std::conditional_t<
+    std::is_reference_v<T>,
+    typename std::reference_wrapper<std::remove_reference_t<T>>,
+    T>;
+template <typename T>
+using MaterializedIListRefElem = _MaterializedIListRefElem<IListRefConstRef<T>>;
+template <typename T>
+using MaterializedIListRef = std::vector<MaterializedIListRefElem<T>>;
+} // namespace detail
+/*
+ * Iterator for `IListRef<T>`.
+ *
+ * What is it?
+ * ===========
+ * Currently, a `std::bidirectional_iterator` that wraps the iterator
+ * types defined for each of the `IListRefTag`.
+ *
+ * One should be able to use it, as if it were the unwrapped
+ * iterators themselves.
+ * What does it do?
+ * ================
+ * Similarly to `IListRef<T>`, this is a wrapper class. Specifically, it
+ * wraps each container's `const_iterator` type alias. So, for example,
+ * given that the container for `IListRefTag::Boxed` is `c10::List`, this
+ * iterator will wrap a `c10::List::const_iterator`.
+ *
+ * [Note: MSVC Iterator Debug]
+ * ===========================
+ * MSVC `vector<T>::iterator` implementation (used in the boxed variant)
+ * makes it so this union's destructor, copy-constructor (assignment), and
+ * move-constructor (assignment) are implicitly deleted.
+ *
+ * Therefore, we need to explicitly define them as needed. Follows a list
+ * of places where these are needed and their reason:
+ *
+ *   - `Payload` destructor:
+ *     it is deleted only if the macro `_ITERATOR_DEBUG_LEVEL` is set to 2.
+ *
+ *   - `IListRefIterator` destructor:
+ *     same as above. However, we need to explicitly call the variant
+ *     destructor explicitly.
+ *
+ *   - `IListRefIterator` copy-constructor:
+ *     it is deleted only if the macro `_ITERATOR_DEBUG_LEVEL` is different
+ *     than 0.
+ */
+template <typename T>
+class IListRefIterator {
+ private:
+#define DEFINE_FRIEND_CLASS(TAG, ...)                        \
+  friend class detail::IListRefTagImpl<IListRefTag::TAG, T>; \
+  friend class detail::IListRefTagImplBase<                  \
+      IListRefTag::TAG,                                      \
+      T,                                                     \
+      typename detail::IListRefTagImpl<IListRefTag::TAG, T>::elem_type>;
+  TORCH_ILISTREF_FORALL_TAGS(DEFINE_FRIEND_CLASS)
+#undef DEFINE_FRIEND_CLASS
+ public:
+  // C++17 friendly std::iterator implementation
+  using iterator_category = std::bidirectional_iterator_tag;
+  using value_type = T;
+  using difference_type = std::ptrdiff_t;
+  using pointer = T*;
+  using reference = T&;
+  using unboxed_iterator_type = typename detail::
+      IListRefTagImpl<IListRefTag::Unboxed, T>::list_type::const_iterator;
+  using boxed_iterator_type = typename detail::
+      IListRefTagImpl<IListRefTag::Boxed, T>::list_type::const_iterator;
+  using materialized_iterator_type =
+      typename detail::MaterializedIListRef<T>::const_iterator;
+  IListRefIterator() : tag_(IListRefTag::None) {}
+#if defined(_MSC_VER) && _ITERATOR_DEBUG_LEVEL != 0
+  // See [Note: MSVC Iterator Debug]
+  IListRefIterator(const IListRefIterator& iterator)
+      : tag_(iterator.tag_) {
+    switch (tag_) {
+      case IListRefTag::Boxed:
+        payload_.boxed_iterator = iterator.payload_.boxed_iterator;
+        break;
+      case IListRefTag::Unboxed:
+        payload_.unboxed_iterator = iterator.payload_.unboxed_iterator;
+        break;
+      case IListRefTag::Materialized:
+        payload_.materialized_iterator = iterator.payload_.materialized_iterator;
+        break;
+      default:
+        TORCH_INTERNAL_ASSERT(false, "invalid IListRef tag.");
+    }
+  }
+#endif
+#if defined(_MSC_VER) && _ITERATOR_DEBUG_LEVEL == 2
+  // See [Note: MSVC Iterator Debug]
+  ~IListRefIterator() noexcept(false) {
+    switch (tag_) {
+      case IListRefTag::Boxed:
+        payload_.boxed_iterator.~boxed_iterator_type();
+        break;
+      case IListRefTag::Unboxed:
+        payload_.unboxed_iterator.~unboxed_iterator_type();
+        break;
+      case IListRefTag::Materialized:
+        payload_.materialized_iterator.~materialized_iterator_type();
+        break;
+      default:
+        TORCH_INTERNAL_ASSERT(false, "invalid IListRef tag.");
+    }
+  }
+#endif
+  IListRefIterator(boxed_iterator_type boxed) : tag_(IListRefTag::Boxed) {
+    payload_.boxed_iterator = boxed;
+  }
+  IListRefIterator(unboxed_iterator_type unboxed) : tag_(IListRefTag::Unboxed) {
+    payload_.unboxed_iterator = unboxed;
+  }
+  IListRefIterator(materialized_iterator_type materialized) : tag_(IListRefTag::Materialized) {
+    payload_.materialized_iterator = materialized;
+  }
+  detail::IListRefConstRef<T> operator*() const {
+    TORCH_ILISTREF_UNWRAP(tag_, { return ImplT::iterator_get(this_); });
+  }
+  IListRefIterator& operator++() {
+    TORCH_ILISTREF_UNWRAP(tag_, { ++this_; });
+    return *this;
+  }
+  IListRefIterator operator++(int) {
+    auto old = *this;
+    TORCH_ILISTREF_UNWRAP(tag_, { ++this_; });
+    return old;
+  }
+  IListRefIterator& operator--() {
+    TORCH_ILISTREF_UNWRAP(tag_, { --this_; });
+    return *this;
+  }
+  IListRefIterator operator--(int) {
+    auto old = *this;
+    TORCH_ILISTREF_UNWRAP(tag_, { --this_; });
+    return old;
+  }
+  bool operator==(const IListRefIterator& rhs) const {
+    if (tag_ != rhs.tag_) {
+      return false;
+    }
+    TORCH_ILISTREF_UNWRAP(tag_, {
+      auto& rhs_it = ImplT::unwrap(rhs);
+      return this_ == rhs_it;
+    });
+  }
+  bool operator!=(const IListRefIterator& rhs) const {
+    return !(*this == rhs);
+  }
+ private:
+  union Payload {
+    boxed_iterator_type boxed_iterator;
+    unboxed_iterator_type unboxed_iterator;
+    materialized_iterator_type materialized_iterator;
+    void* _init_ptr;
+    Payload() : _init_ptr(nullptr) {}
+#if defined(_MSC_VER)
+    // See [Note: MSVC Iterator Debug]
+    ~Payload() {}
+#endif
+  };
+  Payload payload_;
+  IListRefTag tag_;
+};
+/*
+ * See [Note: IListRef]
+ */
+template <typename T>
+class IListRef {
+ private:
+#define DEFINE_FRIEND_CLASS(TAG, ...)                        \
+  friend class detail::IListRefTagImpl<IListRefTag::TAG, T>; \
+  friend class detail::IListRefTagImplBase<                  \
+      IListRefTag::TAG,                                      \
+      T,                                                     \
+      typename detail::IListRefTagImpl<IListRefTag::TAG, T>::elem_type>;
+  TORCH_ILISTREF_FORALL_TAGS(DEFINE_FRIEND_CLASS)
+#undef DEFINE_FRIEND_CLASS
+ public:
+  using unboxed_type =
+      typename detail::IListRefTagImpl<IListRefTag::Unboxed, T>::list_type;
+  using boxed_type =
+      typename detail::IListRefTagImpl<IListRefTag::Boxed, T>::list_type;
+  using materialized_type =
+      typename detail::MaterializedIListRef<T>;
+  using iterator = IListRefIterator<T>;
+  using const_iterator = IListRefIterator<T>;
+  using reverse_iterator = std::reverse_iterator<iterator>;
+  using value_type = typename iterator::value_type;
+  IListRef() : tag_(IListRefTag::None) {}
+  IListRef(const boxed_type& boxed) : tag_(IListRefTag::Boxed) {
+    payload_.boxed = &boxed;
+  }
+  IListRef(const unboxed_type& unboxed) : tag_(IListRefTag::Unboxed) {
+    payload_.unboxed = unboxed;
+  }
+  IListRef(const std::initializer_list<T>& list) : tag_(IListRefTag::Unboxed) {
+    payload_.unboxed = at::ArrayRef<T>(list);
+  }
+  template <
+      typename... UnboxedConstructorArgs,
+      typename = std::enable_if_t<
+          std::is_constructible_v<unboxed_type, UnboxedConstructorArgs...>>>
+  IListRef(UnboxedConstructorArgs&&... args) : tag_(IListRefTag::Unboxed) {
+    payload_.unboxed = unboxed_type(std::forward<UnboxedConstructorArgs>(args)...);
+  }
+  IListRef(const materialized_type& materialized) : tag_(IListRefTag::Materialized) {
+    payload_.materialized = &materialized;
+  }
+  size_t size() const {
+    TORCH_ILISTREF_UNWRAP(tag_, { return this_.size(); });
+  }
+  bool empty() const {
+    return size() == 0;
+  }
+  iterator begin() const {
+    TORCH_ILISTREF_UNWRAP(tag_, { return this_.begin(); });
+  }
+  iterator end() const {
+    TORCH_ILISTREF_UNWRAP(tag_, { return this_.end(); });
+  }
+  detail::IListRefConstRef<T> front() const {
+    TORCH_ILISTREF_UNWRAP(tag_, { return ImplT::front(this_); });
+  }
+  /*
+   * Materializes the `IListRef` into a `std::vector`.
+   *
+   * This should be used when one wishes to either:
+   *
+   *   - iterate over the list more than once: each `IListRefIterator`
+   *     member function call has to go through a switch, introducing
+   *     non-negligible overhead
+   *
+   *   - randomly access an arbitrary element using `operator[]`:
+   *     same reason as above
+   */
+  detail::MaterializedIListRef<T> materialize() const {
+    if (isMaterialized()) {
+      return toMaterialized();
+    }
+    detail::MaterializedIListRef<T> materialized;
+    materialized.reserve(size());
+    for (const auto& t : *this) {
+      materialized.emplace_back(t);
+    }
+    return materialized;
+  }
+#define DEFINE_CHECK(TAG, ...)    \
+  bool is##TAG() const {          \
+    return tag_ == IListRefTag::TAG; \
+  }
+  TORCH_ILISTREF_FORALL_TAGS(DEFINE_CHECK)
+#undef DEFINE_CHECK
+  bool isNone() const {
+    return tag_ == IListRefTag::None;
+  }
+#define DEFINE_CASTING(TAG, ...)                                          \
+  const typename detail::IListRefTagImpl<IListRefTag::TAG, T>::list_type& \
+      to##TAG() const {                                                   \
+    TORCH_INTERNAL_ASSERT(is##TAG());                                     \
+    return detail::IListRefTagImpl<IListRefTag::TAG, T>::unwrap(*this);   \
+  }
+  TORCH_ILISTREF_FORALL_TAGS(DEFINE_CASTING)
+#undef DEFINE_CASTING
+ private:
+  union Payload {
+    const boxed_type* boxed;
+    unboxed_type unboxed;
+    const materialized_type* materialized;
+    Payload() : boxed(nullptr) {}
+  };
+  Payload payload_;
+  IListRefTag tag_;
+};
+} // namespace c10
+#include <ATen/core/IListRef_inl.h>

phivenv/Lib/site-packages/torch/include/ATen/core/IListRef_inl.h ADDED Viewed

	@@ -0,0 +1,203 @@

+#pragma once
+#include <ATen/core/List.h>
+#include <ATen/core/Tensor.h>
+namespace at {
+class Tensor;
+class OptionalTensorRef;
+}
+namespace c10::detail {
+/*
+ * Specializations of `IListRefTagImplBase` that implement the default
+ * implementation for `IListRefTag::Unboxed`.
+ */
+template <typename T, typename ListElemT>
+class IListRefTagImplBase<IListRefTag::Unboxed, T, ListElemT> {
+ public:
+  using elem_type = ListElemT;
+  using list_type = ArrayRef<elem_type>;
+  /*
+   * These `unwrap` static methods unwraps the inner containers out
+   * of `IListRef<T>` (and `IListRefIterator<T>`). They are required when
+   * the macro `TORCH_ILISTREF_UNWRAP` is called.
+   */
+  static const list_type& unwrap(const IListRef<T>& ilist) {
+    return ilist.payload_.unboxed;
+  }
+  static typename list_type::const_iterator& unwrap(IListRefIterator<T>& it) {
+    return it.payload_.unboxed_iterator;
+  }
+  static const typename list_type::const_iterator& unwrap(
+      const IListRefIterator<T>& it) {
+    return it.payload_.unboxed_iterator;
+  }
+  /*
+   * We have these function (besides the `unwrap`s above) because the
+   * implementation for both `IListRef::operator[]` and `IListRefIterator::operator*`
+   * weren't syntatically equal for the existing tags at the time
+   * (`Unboxed` and `Boxed`).
+   */
+  static IListRefConstRef<T> front(const list_type& lst) {
+    return lst.front();
+  }
+  static IListRefConstRef<T> iterator_get(
+      const typename list_type::const_iterator& it) {
+    return *it;
+  }
+};
+/*
+ * Specializations of `IListRefTagImplBase` that implement the default
+ * implementation for `IListRefTag::Boxed`.
+ */
+template <typename T, typename ListElemT>
+class IListRefTagImplBase<IListRefTag::Boxed, T, ListElemT> {
+ public:
+  using elem_type = ListElemT;
+  using list_type = List<elem_type>;
+  static const list_type& unwrap(const IListRef<T>& ilist) {
+    return *ilist.payload_.boxed;
+  }
+  static typename list_type::const_iterator& unwrap(IListRefIterator<T>& it) {
+    return it.payload_.boxed_iterator;
+  }
+  static const typename list_type::const_iterator& unwrap(
+      const IListRefIterator<T>& it) {
+    return it.payload_.boxed_iterator;
+  }
+  static IListRefConstRef<T> front(const list_type& lst) {
+    return lst[0];
+  }
+  static IListRefConstRef<T> iterator_get(
+      const typename list_type::const_iterator& it) {
+    return (*it).get().toTensor();
+  }
+};
+/*
+ * Specializations of `IListRefTagImplBase` that implement the default
+ * implementation for `IListRefTag::Materialized`.
+ */
+template <typename T>
+class IListRefTagImplBase<IListRefTag::Materialized, T, MaterializedIListRefElem<T>> {
+ public:
+  using elem_type = MaterializedIListRefElem<T>;
+  using list_type = MaterializedIListRef<T>;
+  static const list_type& unwrap(const IListRef<T>& ilist) {
+    return *ilist.payload_.materialized;
+  }
+  static typename list_type::const_iterator& unwrap(IListRefIterator<T>& it) {
+    return it.payload_.materialized_iterator;
+  }
+  static const typename list_type::const_iterator& unwrap(
+      const IListRefIterator<T>& it) {
+    return it.payload_.materialized_iterator;
+  }
+  static IListRefConstRef<T> front(const list_type& lst) {
+    return lst[0];
+  }
+  static IListRefConstRef<T> iterator_get(
+      const typename list_type::const_iterator& it) {
+    return *it;
+  }
+};
+/*
+ * [Note: ITensorListRef]
+ * Specializations necessary for `IListRef<at::Tensor>` type.
+ *
+ * Since the default implementations are usually done with supporting
+ * `Tensor` in mind, we only have to inherit from the base implementations.
+ */
+template <>
+class IListRefTagImpl<IListRefTag::Unboxed, at::Tensor>
+    : public IListRefTagImplBase<IListRefTag::Unboxed, at::Tensor> {};
+template <>
+class IListRefTagImpl<IListRefTag::Boxed, at::Tensor>
+    : public IListRefTagImplBase<IListRefTag::Boxed, at::Tensor> {};
+template <>
+class IListRefTagImpl<IListRefTag::Materialized, at::Tensor>
+    : public IListRefTagImplBase<
+          IListRefTag::Materialized,
+          at::Tensor,
+          MaterializedIListRefElem<at::Tensor>> {};
+/*
+ * [Note: IOptTensorListRef]
+ * Specializations necessary for `IListRef<at::OptionalTensorRef>` type.
+ *
+ * We can't get an `at::OptionalTensorRef` directly from an instance of
+ * `List<optional<Tensor>>` (the type that corresponds to the boxed world).
+ *
+ * So, the default implementation won't help us. Thus, we have to implement
+ * this method ourselves.
+ */
+template <>
+class IListRefTagImpl<IListRefTag::Unboxed, at::OptionalTensorRef>
+    : public IListRefTagImplBase<IListRefTag::Unboxed, at::OptionalTensorRef> {};
+template <>
+class IListRefTagImpl<IListRefTag::Boxed, at::OptionalTensorRef>
+    : public IListRefTagImplBase<IListRefTag::Boxed, at::OptionalTensorRef, std::optional<at::Tensor>> {
+ public:
+  /*
+   * Given an instance of the types corresponding to the `Boxed` tag, we override
+   * the default implementation, so that we can return a `at::OptionalTensorRef`.
+   */
+  static IListRefConstRef<at::OptionalTensorRef> iterator_get(
+      const typename list_type::const_iterator& it) {
+    C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wdangling-reference")
+    const auto& ivalue = (*it).get();
+    C10_DIAGNOSTIC_POP()
+    if (!ivalue.isNone()) {
+        const auto& tensor = ivalue.toTensor();
+        return (tensor.defined()) ? tensor : at::OptionalTensorRef{};
+    }
+    return {};
+  }
+};
+template <>
+class IListRefTagImpl<IListRefTag::Materialized, at::OptionalTensorRef>
+    : public IListRefTagImplBase<
+          IListRefTag::Materialized,
+          at::OptionalTensorRef,
+          MaterializedIListRefElem<at::OptionalTensorRef>> {};
+} // namespace c10::detail
+namespace at {
+// [Note: ITensorListRef]
+using ITensorListRef = c10::IListRef<at::Tensor>;
+using ITensorListRefIterator = c10::IListRefIterator<at::Tensor>;
+using MaterializedITensorListRef = c10::detail::MaterializedIListRef<at::Tensor>;
+// [Note: IOptTensorListRef]
+using IOptTensorListRef = c10::IListRef<at::OptionalTensorRef>;
+using IOptTensorListRefIterator = c10::IListRefIterator<at::OptionalTensorRef>;
+using MaterializedIOptTensorListRef = c10::detail::MaterializedIListRef<at::OptionalTensorRef>;
+} // namespace at

phivenv/Lib/site-packages/torch/include/ATen/core/LegacyTypeDispatch.h ADDED Viewed

	@@ -0,0 +1,111 @@

+#pragma once
+// The legacy mechanism for dispatching operators in ATen is a Type
+// object, which is essentially a giant virtual dispatch table
+// for every operation we support dynamically dispatching over.
+//
+// This has been deprecated in favor of ATenDispatch, and in the future,
+// c10 dispatcher.
+// TODO: Clean up what remains here
+#include <c10/core/impl/LocalDispatchKeySet.h>
+namespace at {
+// A RAII, thread local (!) guard that will disable dispatch to variable
+// handler.
+//
+// NOTE [ Treating Variables as non-Variables in type dispatch ]
+//
+// What exactly does AutoDispatchBelowAutograd do?  The short answer is, it causes
+// dispatches on ATen functions to go to the non-variable implementation,
+// bypassing autograd handling (and also profiling and tracing).
+//
+// To understand why this guard exists, it's helpful to understand the history
+// behind how Variable was implemented.  Previously, Variables were implemented
+// as a wrapper on Tensors; so the act of processing a Variable involved
+// unwrapping the underlying Tensor, and then calling the underlying base
+// operation on /that/ operation
+//
+// However, after the Variable/Tensor merge, there is no concept of unwrapping
+// a tensor anymore.  If you just call the operation on the same variable
+// again inside your VariableType handler, you'll dispatch back to
+// VariableType, which is not what we want.
+//
+// The solution to the above problem is to add `at::AutoDispatchBelowAutograd`, which
+// when enabled will cause `legacyTensorType()` and `getType()` to always return
+// non-Variable type, even if the tensor being called on is a variable.
+/* Note [AutoDispatchBelowAutograd]
+ * AutoDispatchBelowAutograd is **INTERNAL ONLY** that it should be used
+ * for kernel implementations and customized C++ kernels.
+ * If you are looking for a guard to run workload in inference mode, please use
+ * c10::InferenceMode RAII which is user facing API.
+ * In the past AutoDispatchBelowAutograd(or its old version AutoNonVariableTypeMode)
+ * was used in the user code for inference-only workload, this was under risk of
+ * producing wrong results silently in some edge cases. For example:
+ * ```
+ *  torch::Tensor s = torch::ones({1, 2, 3}).set_requires_grad(true);
+ *  torch::Tensor out = s * s;
+ *  {
+ *    at::AutoDispatchBelowAutograd guard;
+ *    s.add_(1);  // Skips version bump on `s`.
+ *  }
+ *  // WRONG GRADIENT! s.grad() are now computed using `s` value after the
+ *  // inplace update.
+ *  out.backward(torch::ones_like(out));
+ * ```
+ * Users should use `c10::InferenceMode` here so that it'll properly throw an
+ * error saying "one of the variables needed for gradient computation has be modified."
+ */
+struct TORCH_API AutoDispatchBelowAutograd {
+  AutoDispatchBelowAutograd() :
+    autograd_guard_(c10::autograd_dispatch_keyset) {
+  }
+  // disable all autograd dispatch keys
+  c10::impl::ExcludeDispatchKeyGuard autograd_guard_;
+};
+// TODO: AutoNonVariableTypeMode should be removed in release 1.10.
+struct TORCH_API AutoNonVariableTypeMode {
+  AutoNonVariableTypeMode(bool enabled = true) :
+    autograd_guard_(c10::autograd_dispatch_keyset) {
+    TORCH_WARN_ONCE("AutoNonVariableTypeMode is deprecated and will be removed in 1.10 release. "
+        "For kernel implementations please use AutoDispatchBelowADInplaceOrView instead, "
+        "If you are looking for a user facing API to enable running your inference-only "
+        "workload, please use c10::InferenceMode. Using AutoDispatchBelowADInplaceOrView in user code "
+        "is under risk of producing silent wrong result in some edge cases. "
+        "See Note [AutoDispatchBelowAutograd] for more details.");
+    TORCH_INTERNAL_ASSERT(enabled);
+  }
+  // disable all autograd dispatch keys
+  c10::impl::ExcludeDispatchKeyGuard autograd_guard_;
+};
+struct TORCH_API AutoDispatchSkipFunctionalize {
+  AutoDispatchSkipFunctionalize() :
+    dispatch_key_guard_(c10::DispatchKeySet(c10::DispatchKey::Functionalize)) {
+  }
+  c10::impl::ExcludeDispatchKeyGuard dispatch_key_guard_;
+};
+/* Note [AutoDispatchBelowADInplaceOrView]
+ * AutoDispatchBelowADInplaceOrView is equivalent to AutoNonVariableTypeMode
+ * before we split inplace & view ops out of VariableType kernel.
+ * Note this guard is used in VariableType kernels for functional ops
+ * as well as ADInplaceOrView kernels for inplace/view ops to enforce the
+ * Invariant:
+ *   Once you are in VariableType/ADInplaceOrView kernel for an op,
+ *   you never go back to a kernel on same dispatch key until
+ *   you finish the current op.
+ */
+struct TORCH_API AutoDispatchBelowADInplaceOrView {
+  AutoDispatchBelowADInplaceOrView() :
+    dispatch_key_guard_(c10::autograd_dispatch_keyset_with_ADInplaceOrView) {
+  }
+  // disable Autograd & ADInplaceOrView dispatch keys
+  c10::impl::ExcludeDispatchKeyGuard dispatch_key_guard_;
+};
+} // namespace at

phivenv/Lib/site-packages/torch/include/ATen/core/List.h ADDED Viewed

	@@ -0,0 +1,491 @@

+#pragma once
+#include <ATen/core/ivalue_to.h>
+#include <ATen/core/jit_type_base.h>
+#include <c10/macros/Macros.h>
+#include <c10/macros/Export.h>
+#include <c10/util/TypeTraits.h>
+#include <c10/util/TypeList.h>
+#include <c10/util/intrusive_ptr.h>
+#include <c10/util/ArrayRef.h>
+#include <optional>
+#include <vector>
+namespace at {
+class Tensor;
+}
+namespace c10 {
+struct IValue;
+template<class T> class List;
+struct Type;
+namespace detail {
+struct ListImpl final : public c10::intrusive_ptr_target {
+  using list_type = std::vector<IValue>;
+  explicit TORCH_API ListImpl(list_type list_, TypePtr elementType_);
+  list_type list;
+  TypePtr elementType;
+  intrusive_ptr<ListImpl> copy() const {
+    return make_intrusive<ListImpl>(list, elementType);
+  }
+  friend TORCH_API bool operator==(const ListImpl& lhs, const ListImpl& rhs);
+};
+}
+namespace impl {
+template<class T, class Iterator> class ListIterator;
+template<class T, class Iterator> class ListElementReference;
+template<class T, class Iterator>
+void swap(ListElementReference<T, Iterator>&& lhs, ListElementReference<T, Iterator>&& rhs) noexcept;
+template<class T, class Iterator>
+bool operator==(const ListElementReference<T, Iterator>& lhs, const T& rhs);
+template<class T, class Iterator>
+bool operator==(const T& lhs, const ListElementReference<T, Iterator>& rhs);
+template<class T>
+struct ListElementConstReferenceTraits {
+  // In the general case, we use IValue::to().
+  using const_reference = typename c10::detail::ivalue_to_const_ref_overload_return<T>::type;
+};
+// There is no to() overload for std::optional<std::string>.
+template<>
+struct ListElementConstReferenceTraits<std::optional<std::string>> {
+  using const_reference = std::optional<std::reference_wrapper<const std::string>>;
+};
+template<class T, class Iterator>
+class ListElementReference final {
+public:
+  operator std::conditional_t<
+      std::is_reference_v<typename c10::detail::
+                            ivalue_to_const_ref_overload_return<T>::type>,
+      const T&,
+      T>() const;
+  ListElementReference& operator=(T&& new_value) &&;
+  ListElementReference& operator=(const T& new_value) &&;
+  // assigning another ref to this assigns the underlying value
+  ListElementReference& operator=(ListElementReference&& rhs) && noexcept;
+  const IValue& get() const& {
+    return *iterator_;
+  }
+  friend void swap<T, Iterator>(ListElementReference&& lhs, ListElementReference&& rhs) noexcept;
+  ListElementReference(const ListElementReference&) = delete;
+  ListElementReference& operator=(const ListElementReference&) = delete;
+  ~ListElementReference() = default;
+private:
+  ListElementReference(Iterator iter)
+  : iterator_(iter) {}
+  // allow moving, but only our friends (i.e. the List class) can move us
+  ListElementReference(ListElementReference&&) noexcept = default;
+  ListElementReference& operator=(ListElementReference&& rhs) & noexcept {
+    iterator_ = std::move(rhs.iterator_);
+    return *this;
+  }
+  friend class List<T>;
+  friend class ListIterator<T, Iterator>;
+  Iterator iterator_;
+};
+// this wraps vector::iterator to make sure user code can't rely
+// on it being the type of the underlying vector.
+template <class T, class Iterator>
+class ListIterator final {
+ public:
+   // C++17 friendly std::iterator implementation
+  using iterator_category = std::random_access_iterator_tag;
+  using value_type = T;
+  using difference_type = std::ptrdiff_t;
+  using pointer = T*;
+  using reference = ListElementReference<T, Iterator>;
+  explicit ListIterator() = default;
+  ~ListIterator() = default;
+  ListIterator(const ListIterator&) = default;
+  ListIterator(ListIterator&&) noexcept = default;
+  ListIterator& operator=(const ListIterator&) = default;
+  ListIterator& operator=(ListIterator&&) noexcept = default;
+  ListIterator& operator++() {
+      ++iterator_;
+      return *this;
+  }
+  ListIterator operator++(int) {
+      ListIterator copy(*this);
+      ++*this;
+      return copy;
+  }
+  ListIterator& operator--() {
+      --iterator_;
+      return *this;
+  }
+  ListIterator operator--(int) {
+      ListIterator copy(*this);
+      --*this;
+      return copy;
+  }
+  ListIterator& operator+=(typename List<T>::size_type offset) {
+      iterator_ += offset;
+      return *this;
+  }
+  ListIterator& operator-=(typename List<T>::size_type offset) {
+      iterator_ -= offset;
+      return *this;
+  }
+  ListIterator operator+(typename List<T>::size_type offset) const {
+    return ListIterator{iterator_ + offset};
+  }
+  ListIterator operator-(typename List<T>::size_type offset) const {
+    return ListIterator{iterator_ - offset};
+  }
+  friend difference_type operator-(const ListIterator& lhs, const ListIterator& rhs) {
+    return lhs.iterator_ - rhs.iterator_;
+  }
+  ListElementReference<T, Iterator> operator*() const {
+    return {iterator_};
+  }
+  ListElementReference<T, Iterator> operator[](typename List<T>::size_type offset) const {
+    return {iterator_ + offset};
+  }
+private:
+  explicit ListIterator(Iterator iterator): iterator_(std::move(iterator)) {}
+  Iterator iterator_;
+  friend bool operator==(const ListIterator& lhs, const ListIterator& rhs) {
+    return lhs.iterator_ == rhs.iterator_;
+  }
+  friend bool operator!=(const ListIterator& lhs, const ListIterator& rhs) {
+    return !(lhs == rhs);
+  }
+  friend bool operator<(const ListIterator& lhs, const ListIterator& rhs) {
+    return lhs.iterator_ < rhs.iterator_;
+  }
+  friend bool operator<=(const ListIterator& lhs, const ListIterator& rhs) {
+    return lhs.iterator_ <= rhs.iterator_;
+  }
+  friend bool operator>(const ListIterator& lhs, const ListIterator& rhs) {
+    return lhs.iterator_ > rhs.iterator_;
+  }
+  friend bool operator>=(const ListIterator& lhs, const ListIterator& rhs) {
+    return lhs.iterator_ >= rhs.iterator_;
+  }
+  friend class ListIterator<T, typename c10::detail::ListImpl::list_type::iterator>;
+  friend class List<T>;
+};
+template<class T> List<T> toTypedList(List<IValue> list);
+template<class T> List<IValue> toList(List<T>&& list);
+template<class T> List<IValue> toList(const List<T>& list);
+const IValue* ptr_to_first_element(const List<IValue>& list);
+}
+/**
+ * An object of this class stores a list of values of type T.
+ *
+ * This is a pointer type. After a copy, both Lists
+ * will share the same storage:
+ *
+ * > List<int> a;
+ * > List<int> b = a;
+ * > b.push_back("three");
+ * > ASSERT("three" == a.get(0));
+ *
+ * We use this class in the PyTorch kernel API instead of
+ * std::vector<T>, because that allows us to do optimizations
+ * and switch out the underlying list implementation without
+ * breaking backwards compatibility for the kernel API.
+ */
+template<class T>
+// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions)
+class List final {
+private:
+  // This is an intrusive_ptr because List is a pointer type.
+  // Invariant: This will never be a nullptr, there will always be a valid
+  // ListImpl.
+  c10::intrusive_ptr<c10::detail::ListImpl> impl_;
+  using internal_reference_type = impl::ListElementReference<T, typename c10::detail::ListImpl::list_type::iterator>;
+  using internal_const_reference_type = typename impl::ListElementConstReferenceTraits<T>::const_reference;
+public:
+  using value_type = T;
+  using size_type = typename c10::detail::ListImpl::list_type::size_type;
+  using iterator = impl::ListIterator<T, typename c10::detail::ListImpl::list_type::iterator>;
+  using const_iterator = impl::ListIterator<T, typename c10::detail::ListImpl::list_type::iterator>;
+  using reverse_iterator = impl::ListIterator<T, typename c10::detail::ListImpl::list_type::reverse_iterator>;
+  /**
+   * Constructs an empty list.
+   */
+  explicit List();
+  /**
+   * Constructs a list with some initial values.
+   * Example:
+   *   List<int> a({2, 3, 4});
+   */
+  List(std::initializer_list<T> initial_values);
+  explicit List(ArrayRef<T> initial_values);
+  /**
+   * Create a generic list with runtime type information.
+   * This only works for c10::impl::GenericList and is not part of the public API
+   * but only supposed to be used internally by PyTorch.
+   */
+  explicit List(TypePtr elementType);
+  List(const List&) = default;
+  List& operator=(const List&) = default;
+  ~List() = default;
+  /**
+   * Create a new List pointing to a deep copy of the same data.
+   * The List returned is a new list with separate storage.
+   * Changes in it are not reflected in the original list or vice versa.
+   */
+  List copy() const;
+  /**
+   * Returns the element at specified location pos, with bounds checking.
+   * If pos is not within the range of the container, an exception of type std::out_of_range is thrown.
+   */
+  internal_const_reference_type get(size_type pos) const;
+  /**
+   * Moves out the element at the specified location pos and returns it, with bounds checking.
+   * If pos is not within the range of the container, an exception of type std::out_of_range is thrown.
+   * The list contains an invalid element at position pos afterwards. Any operations
+   * on it before re-setting it are invalid.
+   */
+  value_type extract(size_type pos) const;
+  /**
+   * Returns a reference to the element at specified location pos, with bounds checking.
+   * If pos is not within the range of the container, an exception of type std::out_of_range is thrown.
+   *
+   * You cannot store the reference, but you can read it and assign new values to it:
+   *
+   *   List<int64_t> list = ...;
+   *   list[2] = 5;
+   *   int64_t v = list[1];
+   */
+  internal_const_reference_type operator[](size_type pos) const;
+  internal_reference_type operator[](size_type pos);
+  /**
+   * Assigns a new value to the element at location pos.
+   */
+  void set(size_type pos, const value_type& value) const;
+  /**
+   * Assigns a new value to the element at location pos.
+   */
+  void set(size_type pos, value_type&& value) const;
+  /**
+   * Returns an iterator to the first element of the container.
+   * If the container is empty, the returned iterator will be equal to end().
+   */
+  iterator begin() const;
+  /**
+   * Returns an iterator to the element following the last element of the container.
+   * This element acts as a placeholder; attempting to access it results in undefined behavior.
+   */
+  iterator end() const;
+  /**
+   * Checks if the container has no elements.
+   */
+  bool empty() const;
+  /**
+   * Returns the number of elements in the container
+   */
+  size_type size() const;
+  /**
+   * Increase the capacity of the vector to a value that's greater or equal to new_cap.
+   */
+  void reserve(size_type new_cap) const;
+  /**
+   * Erases all elements from the container. After this call, size() returns zero.
+   * Invalidates any references, pointers, or iterators referring to contained elements. Any past-the-end iterators are also invalidated.
+   */
+  void clear() const;
+  /**
+   * Inserts value before pos.
+   * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated.
+   */
+  iterator insert(iterator pos, const T& value) const;
+  /**
+   * Inserts value before pos.
+   * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated.
+   */
+  iterator insert(iterator pos, T&& value) const;
+  /**
+   * Inserts a new element into the container directly before pos.
+   * The new element is constructed with the given arguments.
+   * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated.
+   */
+  template<class... Args>
+  iterator emplace(iterator pos, Args&&... value) const;
+  /**
+   * Appends the given element value to the end of the container.
+   * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated.
+   */
+  void push_back(const T& value) const;
+  /**
+   * Appends the given element value to the end of the container.
+   * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated.
+   */
+  void push_back(T&& value) const;
+  /**
+   * Appends the given list to the end of the container. Uses at most one memory allocation.
+   * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated.
+   */
+  void append(List<T> lst) const;
+  /**
+   * Appends the given element value to the end of the container.
+   * The new element is constructed with the given arguments.
+   * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated.
+   */
+  template<class... Args>
+  void emplace_back(Args&&... args) const;
+  /**
+   * Removes the element at pos.
+   * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated.
+   */
+  iterator erase(iterator pos) const;
+  /**
+   * Removes the elements in the range [first, last).
+   * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated.
+   */
+  iterator erase(iterator first, iterator last) const;
+  /**
+   * Removes the last element of the container.
+   * Calling pop_back on an empty container is undefined.
+   * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated.
+   */
+  void pop_back() const;
+  /**
+   * Resizes the container to contain count elements.
+   * If the current size is less than count, additional default-inserted elements are appended.
+   * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated.
+   */
+  void resize(size_type count) const;
+  /**
+   * Resizes the container to contain count elements.
+   * If the current size is less than count, additional copies of value are appended.
+   * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated.
+   */
+  void resize(size_type count, const T& value) const;
+  /**
+   * Value equality comparison. This function implements Python-like semantics for
+   * equality: two lists with the same identity (e.g. same pointer) trivially
+   * compare equal, otherwise each element is compared for equality.
+   */
+  template <class T_>
+  friend bool operator==(const List<T_>& lhs, const List<T_>& rhs);
+  template <class T_>
+  friend bool operator!=(const List<T_>& lhs, const List<T_>& rhs);
+  /**
+   * Identity comparison. Returns true if and only if `rhs` represents the same
+   * List object as `this`.
+   */
+  bool is(const List<T>& rhs) const;
+  std::vector<T> vec() const;
+  /**
+   * Returns the number of Lists currently pointing to this same list.
+   * If this is the only instance pointing to this list, returns 1.
+   */
+  // TODO Test use_count
+  size_t use_count() const;
+  TypePtr elementType() const;
+  // See [unsafe set type] for why this exists.
+  void unsafeSetElementType(TypePtr t);
+private:
+  explicit List(c10::intrusive_ptr<c10::detail::ListImpl>&& elements);
+  explicit List(const c10::intrusive_ptr<c10::detail::ListImpl>& elements);
+  friend struct IValue;
+  template<class T_> friend List<T_> impl::toTypedList(List<IValue>);
+  template<class T_> friend List<IValue> impl::toList(List<T_>&&);
+  template<class T_> friend List<IValue> impl::toList(const List<T_>&);
+  friend const IValue* impl::ptr_to_first_element(const List<IValue>& list);
+};
+namespace impl {
+// GenericList is how IValue stores lists. It is, however, not part of the
+// public API. Kernels should use Lists with concrete types instead
+// (maybe except for some internal prim ops).
+using GenericList = List<IValue>;
+}
+}
+namespace torch {
+  template<class T> using List = c10::List<T>;
+}
+#include <ATen/core/List_inl.h>  // IWYU pragma: keep

phivenv/Lib/site-packages/torch/include/ATen/core/List_inl.h ADDED Viewed

	@@ -0,0 +1,353 @@

+#pragma once
+#include <ATen/core/jit_type_base.h>
+#include <ATen/core/ivalue.h>
+namespace c10 {
+template<class T> decltype(auto) getTypePtr();
+std::string toString(const Type& type);
+template<class T>
+List<T>::List(c10::intrusive_ptr<c10::detail::ListImpl>&& elements)
+: impl_(std::move(elements)) {}
+template<class T>
+List<T>::List(const c10::intrusive_ptr<c10::detail::ListImpl>& elements)
+: impl_(elements) {}
+template<class T>
+List<T>::List()
+: List(make_intrusive<c10::detail::ListImpl>(
+  typename c10::detail::ListImpl::list_type(),
+  getTypePtr<T>())) {
+  static_assert(!std::is_same_v<T, IValue>, "This constructor is not valid for List<IValue>. Please use c10::impl::GenericList(elementType) instead.");
+}
+template<class T>
+List<T>::List(ArrayRef<T> values)
+: List(make_intrusive<c10::detail::ListImpl>(
+    typename c10::detail::ListImpl::list_type(),
+    getTypePtr<T>())) {
+  static_assert(!std::is_same_v<T, IValue>, "This constructor is not valid for List<IValue>. Please use c10::impl::GenericList(elementType).");
+  impl_->list.reserve(values.size());
+  for (const T& element : values) {
+    impl_->list.push_back(element);
+  }
+}
+template<class T>
+List<T>::List(std::initializer_list<T> initial_values)
+: List(ArrayRef<T>(initial_values)) {
+  static_assert(!std::is_same_v<T, IValue>, "This constructor is not valid for List<IValue>. Please use c10::impl::GenericList(elementType).");
+}
+template<class T>
+List<T>::List(TypePtr elementType)
+: List(make_intrusive<c10::detail::ListImpl>(
+    typename c10::detail::ListImpl::list_type(),
+    std::move(elementType))) {
+  static_assert(std::is_same_v<T, IValue> || std::is_same_v<T, c10::intrusive_ptr<ivalue::Future>>,
+                "This constructor is only valid for c10::impl::GenericList or List<Future>.");
+}
+namespace impl {
+template<class T>
+List<T> toTypedList(impl::GenericList list) {
+  // If there's other instances of the list (i.e. list.use_count() > 1), then we have to be invariant
+  // because upcasting would allow people to add types into the new list that would break the old list.
+  // However, if there aren't any other instances of this list (i.e. list.use_count() == 1), then we can
+  // allow upcasting. This can be a perf improvement since we can cast List<T> to List<optional<T>>
+  // without having to copy it. This is also used to provide backwards compatibility with some old models
+  // that serialized the index arguments to aten::index, aten::index_put, aten::index_put_ and aten::index_put_impl_
+  // as List<Tensor> before we changed that argument to be List<optional<Tensor>>. When deserializing, we
+  // have list.use_count() == 1 and can deserialize the List<Tensor> directly as List<optional<Tensor>>.
+  TORCH_CHECK(*list.impl_->elementType == *getTypePtr<T>()
+    || (list.use_count() == 1 && list.impl_->elementType->isSubtypeOf(*getTypePtr<T>()))
+    , "Tried to cast a List<", toString(*list.impl_->elementType), "> to a List<", toString(*getTypePtr<T>()), ">. Types mismatch.");
+  return List<T>(std::move(list.impl_));
+}
+template<class T>
+impl::GenericList toList(List<T>&& list) {
+  return GenericList(std::move(list.impl_));
+}
+template<class T>
+impl::GenericList toList(const List<T>& list) {
+  return GenericList(list.impl_);
+}
+}
+template<class T>
+List<T> List<T>::copy() const {
+  return List<T>(impl_->copy());
+}
+namespace detail {
+  template<class T>
+  T list_element_to(T element) {
+    return element;
+  }
+  template<class T>
+  T list_element_to(const IValue& element) {
+    return element.template to<T>();
+  }
+  template<class T>
+  T list_element_to(IValue&& element) {
+    return std::move(element).template to<T>();
+  }
+  template<class T>
+  struct ListElementFrom {
+    static IValue from(const T& element) {
+      return element;
+    }
+    static IValue from(T&& element) {
+      return std::move(element);
+    }
+  };
+  template<>
+  struct ListElementFrom<IValue> {
+    static const IValue& from(const IValue& element) {
+      return element;
+    }
+    static IValue&& from(IValue&& element) {
+      return std::move(element);
+    }
+  };
+}
+namespace impl {
+template <class T, class Iterator>
+ListElementReference<T, Iterator>::operator std::conditional_t<
+    std::is_reference_v<typename c10::detail::ivalue_to_const_ref_overload_return<
+        T>::type>,
+    const T&,
+    T>() const {
+  return iterator_->template to<T>();
+}
+template<class T, class Iterator>
+ListElementReference<T, Iterator>& ListElementReference<T, Iterator>::operator=(T&& new_value) && {
+  *iterator_ = c10::detail::ListElementFrom<T>::from(std::move(new_value));
+  return *this;
+}
+template<class T, class Iterator>
+ListElementReference<T, Iterator>& ListElementReference<T, Iterator>::operator=(const T& new_value) && {
+  *iterator_ = c10::detail::ListElementFrom<T>::from(new_value);
+  return *this;
+}
+template<class T, class Iterator>
+ListElementReference<T, Iterator>& ListElementReference<T, Iterator>::operator=(ListElementReference<T, Iterator>&& rhs) && noexcept {
+  *iterator_ = *rhs.iterator_;
+  return *this;
+}
+template<class T, class Iterator>
+void swap(ListElementReference<T, Iterator>&& lhs, ListElementReference<T, Iterator>&& rhs)  noexcept {
+  std::swap(*lhs.iterator_, *rhs.iterator_);
+}
+template<class T, class Iterator>
+bool operator==(const ListElementReference<T, Iterator>& lhs, const T& rhs) {
+  const T& lhs_tmp = lhs;
+  return lhs_tmp == rhs;
+}
+template<class T, class Iterator>
+inline bool operator==(const T& lhs, const ListElementReference<T, Iterator>& rhs) {
+  return rhs == lhs;
+}
+template<class T>
+inline typename ListElementConstReferenceTraits<T>::const_reference
+list_element_to_const_ref(const IValue& element) {
+  return element.template to<T>();
+}
+template<>
+inline typename ListElementConstReferenceTraits<std::optional<std::string>>::const_reference
+list_element_to_const_ref<std::optional<std::string>>(const IValue& element) {
+  return element.toOptionalStringRef();
+}
+} // namespace impl
+template<class T>
+void List<T>::set(size_type pos, const value_type& value) const {
+  impl_->list.at(pos) = c10::detail::ListElementFrom<T>::from(value);
+}
+template<class T>
+void List<T>::set(size_type pos, value_type&& value) const {
+  impl_->list.at(pos) = c10::detail::ListElementFrom<T>::from(std::move(value));
+}
+template<class T>
+typename List<T>::internal_const_reference_type List<T>::get(size_type pos) const {
+  return operator[](pos);
+}
+template<class T>
+typename List<T>::internal_const_reference_type List<T>::operator[](size_type pos) const {
+  return c10::impl::list_element_to_const_ref<T>(impl_->list.at(pos));
+}
+template<class T>
+typename List<T>::internal_reference_type List<T>::operator[](size_type pos) {
+  static_cast<void>(impl_->list.at(pos)); // Throw the exception if it is out of range.
+  return {impl_->list.begin() + static_cast<typename decltype(impl_->list)::difference_type>(pos)};
+}
+template<class T>
+typename List<T>::value_type List<T>::extract(size_type pos) const {
+  auto& elem = impl_->list.at(pos);
+  auto result = c10::detail::list_element_to<T>(std::move(elem));
+  // Reset the list element to a T() instead of None to keep it correctly typed
+  elem = c10::detail::ListElementFrom<T>::from(T{});
+  return result;
+}
+template<class T>
+typename List<T>::iterator List<T>::begin() const {
+  return iterator(impl_->list.begin());
+}
+template<class T>
+typename List<T>::iterator List<T>::end() const {
+  return iterator(impl_->list.end());
+}
+template<class T>
+bool List<T>::empty() const {
+  return impl_->list.empty();
+}
+template<class T>
+typename List<T>::size_type List<T>::size() const {
+  return impl_->list.size();
+}
+template<class T>
+void List<T>::reserve(size_type new_cap) const {
+  impl_->list.reserve(new_cap);
+}
+template<class T>
+void List<T>::clear() const {
+  impl_->list.clear();
+}
+template<class T>
+typename List<T>::iterator List<T>::insert(iterator pos, const T& value) const {
+  return iterator { impl_->list.insert(pos.iterator_, c10::detail::ListElementFrom<T>::from(value)) };
+}
+template<class T>
+typename List<T>::iterator List<T>::insert(iterator pos, T&& value) const {
+  return iterator { impl_->list.insert(pos.iterator_, c10::detail::ListElementFrom<T>::from(std::move(value))) };
+}
+template<class T>
+template<class... Args>
+typename List<T>::iterator List<T>::emplace(iterator pos, Args&&... value) const {
+  // TODO Use list_element_from?
+  return iterator { impl_->list.emplace(pos.iterator_, std::forward<Args>(value)...) };
+}
+template<class T>
+void List<T>::push_back(const T& value) const {
+  impl_->list.push_back(c10::detail::ListElementFrom<T>::from(value));
+}
+template<class T>
+void List<T>::push_back(T&& value) const {
+  impl_->list.push_back(c10::detail::ListElementFrom<T>::from(std::move(value)));
+}
+template<class T>
+void List<T>::append(List<T> b) const {
+  if (b.use_count() == 1) {
+    impl_->list.insert(impl_->list.end(), make_move_iterator(b.impl_->list.begin()), make_move_iterator(b.impl_->list.end()));
+  } else {
+    impl_->list.insert(impl_->list.end(), b.impl_->list.begin(), b.impl_->list.end());
+  }
+}
+template<class T>
+template<class... Args>
+void List<T>::emplace_back(Args&&... args) const {
+  // TODO Use list_element_from?
+  impl_->list.push_back(T(std::forward<Args>(args)...));
+}
+template<class T>
+typename List<T>::iterator List<T>::erase(iterator pos) const {
+  return iterator { impl_->list.erase(pos.iterator_) };
+}
+template<class T>
+typename List<T>::iterator List<T>::erase(iterator first, iterator last) const {
+  return iterator { impl_->list.erase(first.iterator_, last.iterator_) };
+}
+template<class T>
+void List<T>::pop_back() const {
+  impl_->list.pop_back();
+}
+template<class T>
+void List<T>::resize(size_type count) const {
+  impl_->list.resize(count, T{});
+}
+template<class T>
+void List<T>::resize(size_type count, const T& value) const {
+  impl_->list.resize(count, value);
+}
+template<class T>
+bool operator==(const List<T>& lhs, const List<T>& rhs) {
+  // Lists with the same identity trivially compare equal.
+  if (lhs.impl_ == rhs.impl_) {
+    return true;
+  }
+  // Otherwise, just compare values directly.
+  return *lhs.impl_ == *rhs.impl_;
+}
+template<class T>
+bool operator!=(const List<T>& lhs, const List<T>& rhs) {
+  return !(lhs == rhs);
+}
+template<class T>
+bool List<T>::is(const List<T>& rhs) const {
+  return this->impl_ == rhs.impl_;
+}
+template<class T>
+std::vector<T> List<T>::vec() const {
+  std::vector<T> result(begin(), end());
+  return result;
+}
+template<class T>
+size_t List<T>::use_count() const {
+  return impl_.use_count();
+}
+template <class T>
+TypePtr List<T>::elementType() const {
+  return impl_->elementType;
+}
+template <class T>
+void List<T>::unsafeSetElementType(TypePtr t) {
+  impl_->elementType = std::move(t);
+}
+}

phivenv/Lib/site-packages/torch/include/ATen/core/MT19937RNGEngine.h ADDED Viewed

	@@ -0,0 +1,194 @@

+#pragma once
+#include <c10/util/irange.h>
+// define constants like M_PI and C keywords for MSVC
+#ifdef _MSC_VER
+#ifndef _USE_MATH_DEFINES
+#define _USE_MATH_DEFINES
+#endif
+#include <math.h>
+#endif
+#include <array>
+#include <cmath>
+#include <cstdint>
+namespace at {
+constexpr int MERSENNE_STATE_N = 624;
+constexpr int MERSENNE_STATE_M = 397;
+constexpr uint32_t MATRIX_A = 0x9908b0df;
+constexpr uint32_t UMASK = 0x80000000;
+constexpr uint32_t LMASK = 0x7fffffff;
+/**
+ * Note [Mt19937 Engine implementation]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Originally implemented in:
+ * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/MTARCOK/mt19937ar-cok.c
+ * and modified with C++ constructs. Moreover the state array of the engine
+ * has been modified to hold 32 bit uints instead of 64 bits.
+ *
+ * Note that we reimplemented mt19937 instead of using std::mt19937 because,
+ * at::mt19937 turns out to be faster in the pytorch codebase. PyTorch builds with -O2
+ * by default and following are the benchmark numbers (benchmark code can be found at
+ * https://github.com/syed-ahmed/benchmark-rngs):
+ *
+ * with -O2
+ * Time to get 100000000 philox randoms with at::uniform_real_distribution = 0.462759s
+ * Time to get 100000000 at::mt19937 randoms with at::uniform_real_distribution = 0.39628s
+ * Time to get 100000000 std::mt19937 randoms with std::uniform_real_distribution = 0.352087s
+ * Time to get 100000000 std::mt19937 randoms with at::uniform_real_distribution = 0.419454s
+ *
+ * std::mt19937 is faster when used in conjunction with std::uniform_real_distribution,
+ * however we can't use std::uniform_real_distribution because of this bug:
+ * http://open-std.org/JTC1/SC22/WG21/docs/lwg-active.html#2524. Plus, even if we used
+ * std::uniform_real_distribution and filtered out the 1's, it is a different algorithm
+ * than what's in pytorch currently and that messes up the tests in tests_distributions.py.
+ * The other option, using std::mt19937 with at::uniform_real_distribution is a tad bit slower
+ * than at::mt19937 with at::uniform_real_distribution and hence, we went with the latter.
+ *
+ * Copyright notice:
+ * A C-program for MT19937, with initialization improved 2002/2/10.
+ * Coded by Takuji Nishimura and Makoto Matsumoto.
+ * This is a faster version by taking Shawn Cokus's optimization,
+ * Matthe Bellew's simplification, Isaku Wada's real version.
+ *
+ * Before using, initialize the state by using init_genrand(seed)
+ * or init_by_array(init_key, key_length).
+ *
+ * Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ *
+ *   3. The names of its contributors may not be used to endorse or promote
+ *   products derived from this software without specific prior written
+ *   permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Any feedback is very welcome.
+ * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+ * email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
+ */
+/**
+ * mt19937_data_pod is used to get POD data in and out
+ * of mt19937_engine. Used in torch.get_rng_state and
+ * torch.set_rng_state functions.
+ */
+struct mt19937_data_pod {
+  uint64_t seed_;
+  int left_;
+  bool seeded_;
+  uint32_t next_;
+  std::array<uint32_t, MERSENNE_STATE_N> state_;
+};
+class mt19937_engine {
+public:
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
+  inline explicit mt19937_engine(uint64_t seed = 5489) {
+    init_with_uint32(seed);
+  }
+  inline mt19937_data_pod data() const {
+    return data_;
+  }
+  inline void set_data(const mt19937_data_pod& data) {
+    data_ = data;
+  }
+  inline uint64_t seed() const {
+    return data_.seed_;
+  }
+  inline bool is_valid() {
+    if ((data_.seeded_ == true)
+      && (data_.left_ > 0 && data_.left_ <= MERSENNE_STATE_N)
+      && (data_.next_ <= MERSENNE_STATE_N)) {
+      return true;
+    }
+    return false;
+  }
+  inline uint32_t operator()() {
+    if (--(data_.left_) == 0) {
+        next_state();
+    }
+    uint32_t y = *(data_.state_.data() + data_.next_++);
+    y ^= (y >> 11);
+    y ^= (y << 7) & 0x9d2c5680;
+    y ^= (y << 15) & 0xefc60000;
+    y ^= (y >> 18);
+    return y;
+  }
+private:
+  mt19937_data_pod data_;
+  inline void init_with_uint32(uint64_t seed) {
+    data_.seed_ = seed;
+    data_.seeded_ = true;
+    data_.state_[0] = seed & 0xffffffff;
+    for (const auto j : c10::irange(1, MERSENNE_STATE_N)) {
+      data_.state_[j] = (1812433253 * (data_.state_[j-1] ^ (data_.state_[j-1] >> 30)) + j);
+    }
+    data_.left_ = 1;
+    data_.next_ = 0;
+  }
+  inline uint32_t mix_bits(uint32_t u, uint32_t v) {
+    return (u & UMASK) | (v & LMASK);
+  }
+  inline uint32_t twist(uint32_t u, uint32_t v) {
+    return (mix_bits(u,v) >> 1) ^ (v & 1 ? MATRIX_A : 0);
+  }
+  inline void next_state() {
+    uint32_t* p = data_.state_.data();
+    data_.left_ = MERSENNE_STATE_N;
+    data_.next_ = 0;
+    for(int j = MERSENNE_STATE_N - MERSENNE_STATE_M + 1; --j; p++) {
+      *p = p[MERSENNE_STATE_M] ^ twist(p[0], p[1]);
+    }
+    for(int j = MERSENNE_STATE_M; --j; p++) {
+      *p = p[MERSENNE_STATE_M - MERSENNE_STATE_N] ^ twist(p[0], p[1]);
+    }
+    *p = p[MERSENNE_STATE_M - MERSENNE_STATE_N] ^ twist(p[0], data_.state_[0]);
+  }
+};
+typedef mt19937_engine mt19937;
+} // namespace at

phivenv/Lib/site-packages/torch/include/ATen/core/NamedTensor.h ADDED Viewed

	@@ -0,0 +1,143 @@

+#pragma once
+#include <ATen/core/Dimname.h>
+#include <c10/core/TensorImpl.h>
+namespace at {
+class TensorBase;
+// XXX: This file exists because TensorImpl is in c10, but Dimname is in ATen.
+// Due to the c10/ATen library split, TensorImpl cannot depend on Dimname,
+// so we have a couple of workarounds.
+//
+// In the long term, we'll move Dimname to c10 and everything in this file
+// can be refactored out. The main blocker for that is that "c10::Symbol"
+// actually exists outside of c10 and needs to be moved in.
+// TensorImpl has a unique_ptr<NamedTensorMetaInterface> field.
+// XXX: Ideally we would just put std::optional<vector<Dimname>> into TensorImpl.
+//
+// This class has an important invariant: there must be at least ONE
+// non-wildcard
+struct TORCH_API NamedTensorMeta final : public c10::NamedTensorMetaInterface {
+  // This enum is to remind people that the invariant on constructors is that
+  // the list of dimnames must have at least one non-wildcard
+  enum HAS_NON_WILDCARD {
+    HasNonWildcard
+  };
+  explicit NamedTensorMeta(HAS_NON_WILDCARD, DimnameList names)
+    : names_(names.vec()) {
+    check_invariants();
+  }
+  explicit NamedTensorMeta(HAS_NON_WILDCARD, std::vector<Dimname>&& names)
+    : names_(std::move(names)) {
+    check_invariants();
+  }
+  std::unique_ptr<c10::NamedTensorMetaInterface> clone() const override {
+    return std::make_unique<NamedTensorMeta>(HasNonWildcard, names_);
+  }
+  DimnameList names() const { return names_; }
+  // Used for an assertion in TensorImpl.h
+  int64_t slow_dim() const override {
+    return static_cast<int64_t>(names_.size());
+  }
+  void check_invariants() const {
+    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
+      std::any_of(names_.begin(), names_.end(), [](const Dimname& n) { return !n.isWildcard(); }));
+  }
+  void set_names(HAS_NON_WILDCARD, DimnameList new_names) {
+    TORCH_INTERNAL_ASSERT(new_names.size() == names_.size());
+    std::copy(new_names.begin(), new_names.end(), names_.begin());
+    check_invariants();
+  }
+  void set_names(HAS_NON_WILDCARD, std::vector<Dimname>&& new_names) {
+    TORCH_INTERNAL_ASSERT(new_names.size() == names_.size());
+    names_ = std::move(new_names);
+    check_invariants();
+  }
+  // INVARIANT: at least one Dimname is non-WILDCARD
+  std::vector<Dimname> names_;
+};
+// When NamesMode is disabled, then all operations ignore tensors' names fields.
+// Concretely speaking, all tensors are treated as having nullopt names.
+struct TORCH_API NamesMode {
+  static bool is_enabled();
+  static void set_enabled(bool enabled);
+};
+// A RAII, thread local (!) guard that enables or disables names upon
+// construction, and sets it back to the original value upon destruction.
+struct TORCH_API NoNamesGuard {
+  NoNamesGuard() : prev_mode(NamesMode::is_enabled()) {
+    NamesMode::set_enabled(false);
+  }
+  NoNamesGuard(const NoNamesGuard&) = delete;
+  NoNamesGuard(NoNamesGuard&&) = delete;
+  NoNamesGuard& operator=(const NoNamesGuard&) = delete;
+  NoNamesGuard& operator=(NoNamesGuard&&) = delete;
+  ~NoNamesGuard() {
+    if (initialized) {
+      reset();
+    }
+  }
+  void reset() {
+    TORCH_INTERNAL_ASSERT(initialized);
+    NamesMode::set_enabled(prev_mode);
+  }
+ private:
+  bool prev_mode;
+  bool initialized{true};
+};
+void check_names_valid_for(const TensorBase& tensor, DimnameList names);
+void check_names_valid_for(size_t tensor_dim, DimnameList names);
+// Sets the names of `tensor` to be `names`.
+TORCH_API const TensorBase& internal_set_names_inplace(const TensorBase& tensor, std::optional<DimnameList> names);
+TORCH_API const TensorBase& internal_set_names_inplace(const TensorBase& tensor, std::vector<Dimname>&& names, bool validate_names);
+constexpr size_t kMaxNamedTensorDim = 64;
+DimnameList default_names(size_t len);
+namespace impl {
+// Some helper functions on TensorImpl. Useful for working with names in TH.
+// XXX: Ideally these would exist as methods on TensorImpl
+TORCH_API void internal_set_names_inplace(TensorImpl* impl, std::optional<DimnameList> names, bool validate_names);
+TORCH_API void internal_set_names_inplace(TensorImpl* impl, std::vector<Dimname>&& names, bool validate_names);
+void check_names_valid_for(TensorImpl* impl, DimnameList names);
+// Returns true if the tensor's names exist and are not all 'None'.
+// Returns false if the tensor's names don't exist (were not allocated),
+// or if all names are 'None'.
+// We treat not-allocated-names the same as allocated names that are all 'None'.
+TORCH_API bool has_names(const TensorImpl* impl);
+// Returns the names of the tensor's dimensions.
+// Unnamed tensors are treated as having 'None' in all dimension; this method
+// would return a DimnameList of all 'None's for an unnamed tensor.
+TORCH_API DimnameList get_names(const TensorImpl* impl);
+// This is more of an implementation detail; one should use impl::get_names /
+// Tensor::names() whenever possible because it provides a cleaner API.
+// Returns the names of the tensor if they have been allocated; returns nullopt
+// instead if the haven't been. The names of a tensor are not allocated if a
+// tensor is constructed with names=None.
+TORCH_API std::optional<DimnameList> get_opt_names(const TensorImpl* impl);
+} // namespace impl
+} // namespace at

phivenv/Lib/site-packages/torch/include/ATen/core/NestedIntSymNodeImpl.h ADDED Viewed

	@@ -0,0 +1,187 @@

+#pragma once
+#include <c10/core/ConstantSymNodeImpl.h>
+#include <c10/core/SymNodeImpl.h>
+#include <c10/macros/Export.h>
+#include <c10/util/Exception.h>
+#include <c10/util/intrusive_ptr.h>
+#include <cstdint>
+#include <optional>
+#include <string>
+namespace c10 {
+// The motivating usecase for this is to represent the ragged size structure
+// of a jagged tensor [B, [s_0, s_1, s_2], D] as a single integer j0. This
+// allows us to simply return [B, j0, D] if someone queries for the size of our
+// tensor.
+//
+// Morally we define comparison between two nested ints to return true if
+// that comparison holds for all corresponding elements of the arrays they
+// represent. Comparison between a nested int and a plain int is defined
+// similarly.
+//
+// To simulate this desired behavior but also avoid the O(N) cost of checking,
+// we associate each raggedness pattern with an integer "id" that can be used as
+// a proxy to evaluate equality. We also constrain the range of values for this
+// as to enable inequality checks.
+//
+// We also support a positive integer scalar "coeff" that is used for computing
+// strides. For example given, a [B, j0, D] tensor, it can be strided in two
+// different ways: [D * j0, D, 1] and [j0, 1, sum(j0)]. The coeff is used to
+// differentiate the two cases.
+//
+// During tracing the strides of the outputs need to be a function of the size
+// and strides of the inputs so it is important that NestedIntSymNode itself is
+// able to express this.
+class TORCH_API NestedIntSymNodeImpl : public SymNodeImpl {
+ public:
+  // CAUTION: you should probably not be constructing these directly; please
+  // the higher-level API in python instead (TODO: actually introduce that).
+  explicit NestedIntSymNodeImpl(int64_t val, int64_t coeff)
+      : val_(val), coeff_(coeff) {}
+  bool bool_() override {
+    return false;
+  }
+  bool is_int() override {
+    return true;
+  }
+  bool is_float() override {
+    return false;
+  }
+  bool is_bool() override {
+    return false;
+  }
+  bool is_nested_int() const override {
+    return true;
+  }
+  bool has_hint() override {
+    return true;
+  }
+  c10::SymNode wrap_int(int64_t num) override {
+    return SymNode(c10::make_intrusive<ConstantSymNodeImpl<int64_t>>(num));
+  }
+  int64_t guard_int(const char* file, int64_t line) override {
+    TORCH_CHECK(false);
+  }
+  double guard_float(const char* file, int64_t line) override {
+    TORCH_CHECK(false, "not a float");
+  }
+  bool guard_bool(const char* file, int64_t line) override {
+    TORCH_CHECK(false, "not a bool");
+  }
+  int64_t int_() override {
+    TORCH_CHECK(false);
+  }
+  std::string str() override {
+    if (coeff_ == 1) {
+      return "j" + std::to_string(val_);
+    }
+    return std::to_string(coeff_) + "*j" + std::to_string(val_);
+  }
+  // NOTE [ Inequalities with nested int ]
+  //
+  // The semantics of nested int when it comes to relations is that it is
+  // treated as integer known to be within a certain range,
+  //
+  //     j0 \in [2, int64_t::max]
+  //
+  // allowing us to answer queries like j0 >= 1 (True), and j0 == 0 (False).
+  // This is a useful default range for the raggedness pattern of a jagged
+  // tensor (1) since sizes are non-negative, and (2) we need to get past 0/1
+  // specialization checks.
+  //
+  // [ Indeterminate inequalities error out ]
+  //
+  // Given the semantic defined above, certain relations like j0 < 3 are thus
+  // indeterminable. In our impl today, evaluating such relations error
+  //
+  // It may seem convenient to just define indeterminate relations to return
+  // False, but the implementation we maintain in parallel using sympy does not
+  // allow this.
+  //
+  // Sympy only allows overriding of Ge. The other relations (Lt, Gt, Le) are,
+  // by consequence, all derived from Ge e.g., Lt(a, b) := !Ge(a, b). This
+  // would mean that means that if we define the indeterminate j0 >= 3 to be
+  // False, the also indeterminate j0 < 3 will be evaluated to be True!
+  //
+  // [ Coefficient are assumed positive ]
+  //
+  // For the purpose of computing inequalities, we consider the coefficient of
+  // the nested int to be a positive integer.
+  //
+  // Thus, no modifications are needed to the logic since
+  // j0 >= k implies coeff * j0 >= k
+  //
+  c10::SymNode eq(const c10::SymNode& other) override;
+  c10::SymNode ne(const c10::SymNode& other) override;
+  c10::SymNode ge(const c10::SymNode& other) override;
+  c10::SymNode gt(const c10::SymNode& other) override;
+  c10::SymNode lt(const c10::SymNode& other) override;
+  c10::SymNode le(const c10::SymNode& other) override;
+  c10::SymNode mul(const c10::SymNode& other) override;
+  std::optional<int64_t> nested_int() override {
+    return val_;
+  }
+  std::optional<int64_t> nested_int_coeff() override {
+    return coeff_;
+  }
+  bool is_symbolic() override {
+    return false;
+  }
+  c10::SymNode clone() override;
+#define DEFINE_BINARY_NOT_SUPPORTED(name)                           \
+  c10::SymNode name(const c10::SymNode& other) override {           \
+    TORCH_CHECK(false, #name " not supported by NestedIntSymNode"); \
+  }
+  DEFINE_BINARY_NOT_SUPPORTED(add)
+  DEFINE_BINARY_NOT_SUPPORTED(sub)
+  DEFINE_BINARY_NOT_SUPPORTED(truediv)
+  DEFINE_BINARY_NOT_SUPPORTED(pow)
+  DEFINE_BINARY_NOT_SUPPORTED(floordiv)
+  DEFINE_BINARY_NOT_SUPPORTED(mod)
+  DEFINE_BINARY_NOT_SUPPORTED(sym_min)
+  DEFINE_BINARY_NOT_SUPPORTED(sym_max)
+  DEFINE_BINARY_NOT_SUPPORTED(sym_and)
+  DEFINE_BINARY_NOT_SUPPORTED(sym_or)
+#undef DEFINE_BINARY_NOT_SUPPORTED
+#define DEFINE_NOT_SUPPORTED(name)                                     \
+  c10::SymNode name() override {                                       \
+    TORCH_CHECK(false, #name " is not supported by NestedIntSymNode"); \
+  }
+  DEFINE_NOT_SUPPORTED(sym_not)
+  DEFINE_NOT_SUPPORTED(ceil)
+  DEFINE_NOT_SUPPORTED(floor)
+  DEFINE_NOT_SUPPORTED(neg)
+  DEFINE_NOT_SUPPORTED(sym_float)
+#undef DEFINE_NOT_SUPPORTED
+ private:
+  int64_t val_;
+  int64_t coeff_;
+};
+} // namespace c10

phivenv/Lib/site-packages/torch/include/ATen/core/PhiloxRNGEngine.h ADDED Viewed

	@@ -0,0 +1,240 @@

+#pragma once
+// define constants like M_PI and C keywords for MSVC
+#ifdef _MSC_VER
+#define _USE_MATH_DEFINES
+#include <math.h>
+#endif
+#ifdef __CUDACC__
+#include <cuda.h>
+#endif
+#include <array>
+#include <c10/macros/Macros.h>
+#include <cmath>
+#include <cstdint>
+namespace at {
+// typedefs for holding vector data
+namespace detail {
+typedef std::array<uint32_t, 4> UINT4;
+typedef std::array<uint32_t, 2> UINT2;
+typedef std::array<double, 2> DOUBLE2;
+typedef std::array<float, 2> FLOAT2;
+} // namespace detail
+/**
+ * Note [Philox Engine implementation]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Originally implemented in PyTorch's fusion compiler
+ * Refer to: http://www.thesalmons.org/john/random123/papers/random123sc11.pdf
+ * for details regarding the engine.
+ *
+ * Note that currently this implementation of the philox engine is not used
+ * anywhere except for tests in cpu_generator_test.cpp. However, this engine
+ * will replace curandStatePhilox4_32_10_t in the future.
+ *
+ * The philox engine takes a seed value, a subsequeunce
+ * for starting the generation and an offset for the subsequence.
+ * Think of this engine as an algorithm producing a huge array. We are
+ * parallelizing this array by partitioning the huge array and assigning
+ * a thread index to each partition. In other words, each seed value
+ * (there are 2^64 possible seed values) gives a sub array of size
+ * 2^128 (each element in that array is a 128 bit number). Reasoning
+ * behind the array being of size 2^128 is, there are 2^64 possible
+ * thread index value and there is an array of size 2^64 for each of
+ * those thread index. Hence 2^64 * 2^64 = 2^128 for each seed value.
+ *
+ * In short, this generator can produce 2^64 (seed values) * 2^128 (number
+ * of elements in an array given by a seed value) = 2^192 values.
+ *
+ * Arguments:
+ * seed:        Seed values could be any number from 0 to 2^64-1.
+ * subsequence: Subsequence is just the cuda thread indexing with:
+ *              - blockIdx.x * blockDim.x + threadIdx.x
+ * offset:      The offset variable in PhiloxEngine  decides how many 128-bit
+ *              random numbers to skip (i.e. how many groups of 4, 32-bit numbers to skip)
+ *              and hence really decides the total number of randoms that can be achieved
+ *              for the given subsequence.
+ */
+class philox_engine {
+public:
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
+  C10_HOST_DEVICE inline explicit philox_engine(uint64_t seed = 67280421310721,
+                                 uint64_t subsequence = 0,
+                                 uint64_t offset = 0) {
+    reset_state(seed, subsequence);
+    incr_n(offset);
+  }
+  C10_HOST_DEVICE inline void reset_state(uint64_t seed = 67280421310721,
+                                 uint64_t subsequence = 0) {
+    key_[0] = static_cast<uint32_t>(seed);
+    key_[1] = static_cast<uint32_t>(seed >> 32);
+    counter_ = detail::UINT4{};
+    counter_[2] = static_cast<uint32_t>(subsequence);
+    counter_[3] = static_cast<uint32_t>(subsequence >> 32);
+    STATE = 0;
+  }
+  /**
+   * Set the offset field of Philox Generator to the desired offset.
+   */
+  C10_HOST_DEVICE inline void set_offset(uint64_t offset) {
+    counter_[0] = static_cast<uint32_t>(offset);
+    counter_[1] = static_cast<uint32_t>(offset >> 32);
+  }
+  /**
+   * Gets the current offset of the Philox Generator.
+   */
+  C10_HOST_DEVICE uint64_t get_offset() const {
+    uint64_t lo = static_cast<uint64_t>(counter_[0]);
+    uint64_t hi = static_cast<uint64_t>(counter_[1]) << 32;
+    return lo | hi;
+  }
+  /**
+   * Produces a unique 32-bit pseudo random number on every invocation. Bookeeps state to avoid waste.
+   */
+  C10_HOST_DEVICE inline uint32_t operator()(int32_t n_rounds = 10) { // 10 here to preserve back-compat behavior
+    if(STATE == 0) {
+      detail::UINT4 counter = counter_;
+      detail::UINT2 key = key_;
+      output_ = rand(counter, key, n_rounds);
+      incr();
+    }
+    uint32_t ret = output_[static_cast<int>(STATE)];
+    STATE = (STATE + 1) & 3;
+    return ret;
+  }
+  inline float randn(uint32_t n_rounds) {
+    #ifdef __CUDA_ARCH__
+    AT_ASSERT(false, "Unsupported invocation of randn on CUDA");
+    #endif
+    if(STATE == 0) {
+      detail::UINT4 counter = counter_;
+      detail::UINT2 key = key_;
+      output_ = rand(counter, key, n_rounds);
+      incr();
+    }
+    // TODO(min-jean-cho) change to Polar method, a more efficient version of Box-Muller method
+    // TODO(voz) We use std:: below, and thus need a separate impl for CUDA.
+    float u1 = 1 - uint32_to_uniform_float(output_[0]); // uint32_to_uniform_float returns [0,1), we need (0,1] to avoid passing 0 to log.
+    float u2 = 1 - uint32_to_uniform_float(output_[1]);
+    return static_cast<float>(std::sqrt(-2.0 * std::log(u1)) * std::cos(2.0 * M_PI * u2));
+  }
+  /**
+   * Function that Skips N 128 bit numbers in a subsequence
+   */
+  C10_HOST_DEVICE inline void incr_n(uint64_t n) {
+    uint32_t nlo = static_cast<uint32_t>(n);
+    uint32_t nhi = static_cast<uint32_t>(n >> 32);
+    counter_[0] += nlo;
+    // if overflow in x has occurred, carry over to nhi
+    if (counter_[0] < nlo) {
+      nhi++;
+      // if overflow in nhi has occurred during carry over,
+      // propagate that overflow to y and exit to increment z
+      // otherwise return
+      counter_[1] += nhi;
+      if(nhi != 0) {
+        if (nhi <= counter_[1]) {
+          return;
+        }
+      }
+    } else {
+      // if overflow in y has occurred during addition,
+      // exit to increment z
+      // otherwise return
+      counter_[1] += nhi;
+      if (nhi <= counter_[1]) {
+        return;
+      }
+    }
+    if (++counter_[2])
+      return;
+    ++counter_[3];
+  }
+  /**
+   * Function that Skips one 128 bit number in a subsequence
+   */
+  C10_HOST_DEVICE inline void incr() {
+    if (++counter_[0])
+      return;
+    if (++counter_[1])
+      return;
+    if (++counter_[2]) {
+      return;
+    }
+    ++counter_[3];
+  }
+private:
+  detail::UINT4 counter_;
+  detail::UINT4 output_;
+  detail::UINT2 key_;
+  uint32_t STATE;
+  C10_HOST_DEVICE inline uint32_t mulhilo32(uint32_t a, uint32_t b,
+                                    uint32_t *result_high) {
+    #ifdef __CUDA_ARCH__
+      *result_high = __umulhi(a, b);
+      return a*b;
+    #else
+      const uint64_t product = static_cast<uint64_t>(a) * b;
+      *result_high = static_cast<uint32_t>(product >> 32);
+      return static_cast<uint32_t>(product);
+    #endif
+  }
+  C10_HOST_DEVICE inline detail::UINT4 single_round(detail::UINT4 ctr, detail::UINT2 in_key) {
+    uint32_t hi0 = 0;
+    uint32_t hi1 = 0;
+    uint32_t lo0 = mulhilo32(kPhiloxSA, ctr[0], &hi0);
+    uint32_t lo1 = mulhilo32(kPhiloxSB, ctr[2], &hi1);
+    detail::UINT4 ret;
+    ret[0] = hi1 ^ ctr[1] ^ in_key[0];
+    ret[1] = lo1;
+    ret[2] = hi0 ^ ctr[3] ^ in_key[1];
+    ret[3] = lo0;
+    return ret;
+  }
+  C10_HOST_DEVICE constexpr float uint32_to_uniform_float(uint32_t value) {
+      // maximum value such that `MAX_INT * scale < 1.0` (with float rounding)
+      constexpr float scale = 4.6566127342e-10;
+      return static_cast<float>(value & 0x7FFFFFFF) * scale;
+  }
+  C10_HOST_DEVICE inline detail::UINT4 rand(detail::UINT4& counter, detail::UINT2& key, uint32_t n_rounds) {
+    for (uint32_t round = 0; round < (n_rounds - 1); round++) {
+        counter = single_round(counter, key);
+        key[0] += (kPhilox10A); key[1] += (kPhilox10B);
+      }
+    return single_round(counter, key);
+  }
+  static const uint32_t kPhilox10A = 0x9E3779B9;
+  static const uint32_t kPhilox10B = 0xBB67AE85;
+  static const uint32_t kPhiloxSA = 0xD2511F53;
+  static const uint32_t kPhiloxSB = 0xCD9E8D57;
+};
+typedef philox_engine Philox4_32;
+} // namespace at

phivenv/Lib/site-packages/torch/include/ATen/core/PythonFallbackKernel.h ADDED Viewed

	@@ -0,0 +1,35 @@

+#pragma once
+#include <ATen/core/TorchDispatchUtils.h>
+namespace at::impl {
+struct TORCH_API RestorePythonTLSSnapshot {
+  RestorePythonTLSSnapshot();
+  RestorePythonTLSSnapshot(RestorePythonTLSSnapshot&& other) = delete;
+  RestorePythonTLSSnapshot(const RestorePythonTLSSnapshot&) = delete;
+  RestorePythonTLSSnapshot& operator=(const RestorePythonTLSSnapshot&) = delete;
+  RestorePythonTLSSnapshot& operator=(RestorePythonTLSSnapshot&&) = delete;
+  ~RestorePythonTLSSnapshot();
+private:
+  c10::impl::LocalDispatchKeySet saved_;
+  c10::impl::ForceDispatchKeyGuard guard_;
+};
+// RAII guard to make working with the above TLS safer.
+struct TORCH_API MaybeSetTLSOnEntryGuard {
+public:
+  MaybeSetTLSOnEntryGuard();
+  MaybeSetTLSOnEntryGuard(MaybeSetTLSOnEntryGuard&& other) = delete;
+  MaybeSetTLSOnEntryGuard(const MaybeSetTLSOnEntryGuard&) = delete;
+  MaybeSetTLSOnEntryGuard& operator=(const MaybeSetTLSOnEntryGuard&) = delete;
+  MaybeSetTLSOnEntryGuard& operator=(MaybeSetTLSOnEntryGuard&&) = delete;
+  ~MaybeSetTLSOnEntryGuard();
+private:
+  bool value_set_;
+};
+} // namespace at::impl

phivenv/Lib/site-packages/torch/include/ATen/core/PythonOpRegistrationTrampoline.h ADDED Viewed

	@@ -0,0 +1,22 @@

+#pragma once
+#include <ATen/core/dispatch/Dispatcher.h>
+// TODO: this can probably live in c10
+namespace at::impl {
+class TORCH_API PythonOpRegistrationTrampoline final {
+  static std::atomic<c10::impl::PyInterpreter*> interpreter_;
+public:
+  //  Returns true if you successfully registered yourself (that means
+  //  you are in the hot seat for doing the operator registrations!)
+  static bool registerInterpreter(c10::impl::PyInterpreter*);
+  // Returns nullptr if no interpreter has been registered yet.
+  static c10::impl::PyInterpreter* getInterpreter();
+};
+} // namespace at::impl

phivenv/Lib/site-packages/torch/include/ATen/core/QuantizerBase.h ADDED Viewed

	@@ -0,0 +1,84 @@

+#pragma once
+#include <c10/core/ScalarType.h>
+#include <c10/core/QScheme.h>
+#include <c10/util/intrusive_ptr.h>
+namespace at {
+class Tensor;
+struct QTensorImpl;
+struct Quantizer;
+using ConstQuantizerPtr = const c10::intrusive_ptr<Quantizer>&;
+using QuantizerPtr = c10::intrusive_ptr<Quantizer>;
+/**
+ * Quantizer is the class for storing all the information
+ * that's necessary to perform quantize and dequantize
+ * operation.
+ *
+ * We might have different types of quantization schemes and this is
+ * the base class for all quantizers.
+ *
+ * QTensorImpl will hold a pointer to Quantizer so that we can support
+ * different quantization schemes on Tensor.
+ *
+ * For example, the most common quantization scheme, Affine Quantization,
+ * requires scale and zero_point as parameters, we'll store scale and zero_point
+ * inside the instance and we can use it to quantize a float Tensor or
+ * dequantize a quantized Tensor.
+ *
+ * When you add new types of leaf Quantizer class, please also
+ * make sure to add a corresponding QScheme enum since
+ * they should have one to one mapping.
+ *
+ * Note about intrusive_ptr:
+ * Quantized Tensor holds an intrusive_ptr to Quantizer, and multiple Tensor can
+ * share the same Quantizer. Quantizer should be immutable.
+ */
+struct TORCH_API Quantizer : public c10::intrusive_ptr_target {
+  // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
+  const ScalarType scalar_type_;
+  explicit Quantizer(ScalarType scalar_type) : scalar_type_(scalar_type) {}
+  ~Quantizer() override = default;
+  // Copied from torch/csrc/jit/ir/scope.h
+  QuantizerPtr intrusive_from_this() {
+    c10::raw::intrusive_ptr::incref(this); // we are creating a new pointer
+                                           // from a raw `this` pointer
+                                           // so we need to bump the refcount
+                                           // to account for this ownership
+    return c10::intrusive_ptr<Quantizer>::reclaim(this);
+  }
+  /**
+   * Each concrete Quantizer type should have a unique QScheme type.
+   */
+  virtual QScheme qscheme() const = 0;
+  ScalarType scalar_type() const {
+    return scalar_type_;
+  }
+  /**
+   * quantize a float Tensor into a quantized Tensor.
+   */
+  virtual Tensor quantize(const Tensor& t) = 0;
+  /**
+   * dequantize a quantized Tensor into a float Tensor.
+   */
+  virtual Tensor dequantize(const Tensor& t) = 0;
+  /**
+   * dequantize a quantized Tensor into a float Tensor, out= variant
+   */
+  virtual Tensor& dequantize_out(Tensor& out, const Tensor& t) = 0;
+  /**
+   * Compare against `other` for equality.
+   */
+  virtual bool equalTo(QuantizerPtr other) const = 0;
+};
+} // namespace at

phivenv/Lib/site-packages/torch/include/ATen/core/Range.h ADDED Viewed

	@@ -0,0 +1,25 @@

+#pragma once
+#include <cstdint>
+#include <iosfwd>
+namespace at {
+struct Range {
+  Range(int64_t begin, int64_t end)
+    : begin(begin)
+    , end(end) {}
+  int64_t size() const { return end - begin; }
+  Range operator/(int64_t divisor) {
+    return Range(begin / divisor, end / divisor);
+  }
+  int64_t begin;
+  int64_t end;
+};
+std::ostream& operator<<(std::ostream& out, const Range& range);
+}  // namespace at

phivenv/Lib/site-packages/torch/include/ATen/core/Reduction.h ADDED Viewed

	@@ -0,0 +1,14 @@

+#pragma once
+namespace at::Reduction {
+// NB: Keep this in sync with Reduction class in torch/nn/_reduction.py
+// These constants control the reduction behavior of loss functions.
+// Ideally, this would be a scoped enum, but jit doesn't support that
+enum Reduction {
+  None, // Do not reduce
+  Mean, // (Possibly weighted) mean of losses
+  Sum, // Sum losses
+  END
+};
+} // namespace at::Reduction

phivenv/Lib/site-packages/torch/include/ATen/core/Scalar.h ADDED Viewed

	@@ -0,0 +1 @@


1	+ #include <c10/core/Scalar.h>

phivenv/Lib/site-packages/torch/include/ATen/core/ScalarType.h ADDED Viewed

	@@ -0,0 +1 @@


1	+ #include <c10/core/ScalarType.h>

phivenv/Lib/site-packages/torch/include/ATen/core/Tensor.h ADDED Viewed

	@@ -0,0 +1,98 @@

+#pragma once
+#include <ATen/core/TensorBody.h>
+#include <c10/util/Exception.h>
+namespace at {
+// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions)
+class TORCH_API OptionalTensorRef {
+ public:
+  OptionalTensorRef() = default;
+  ~OptionalTensorRef() {
+    ref_.unsafeReleaseTensorImpl();
+  }
+  OptionalTensorRef(const TensorBase& src)
+      : ref_(Tensor::unsafe_borrow_t{}, src) {
+    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(src.defined());
+  }
+  OptionalTensorRef(const OptionalTensorRef& rhs)
+      : ref_(Tensor::unsafe_borrow_t{}, rhs.ref_) {}
+  OptionalTensorRef(OptionalTensorRef&& rhs) = default;
+  OptionalTensorRef& operator=(OptionalTensorRef rhs) {
+    std::swap(ref_, rhs.ref_);
+    return *this;
+  }
+  bool has_value() const {
+    return ref_.defined();
+  }
+  const Tensor& getTensorRef() const & {
+    return ref_;
+  }
+  const Tensor& operator*() const & {
+    return ref_;
+  }
+  const Tensor* operator->() const & {
+    return &ref_;
+  }
+  operator bool() const {
+    return ref_.defined();
+  }
+ private:
+  Tensor ref_;
+};
+// Use to convert a TensorBase (that may be undefined) to an at::Tensor
+// without bumping refcount.
+class TORCH_API TensorRef {
+ public:
+  ~TensorRef() {
+    ref_.unsafeReleaseTensorImpl();
+  }
+  TensorRef(const TensorBase& src)
+      : ref_(Tensor::unsafe_borrow_t{}, src) {}
+  TensorRef(TensorRef&& other) = default;
+  TensorRef(const TensorRef&) = default;
+  TensorRef& operator=(const TensorRef&) = default;
+  TensorRef& operator=(TensorRef&&) = default;
+  const Tensor& operator*() const & {
+    return ref_;
+  }
+ private:
+  Tensor ref_;
+};
+template <typename T>
+auto Tensor::register_hook(T&& hook) const -> Tensor::hook_return_void_t<T> {
+  // Return the grad argument in case of a hook with void return type to have an
+  // std::function with Tensor return type
+  static_assert(std::is_same_v<decltype(hook(Tensor())), void>,
+                "Expected hook to return void");
+  return _register_hook([fn=std::forward<T>(hook)](const TensorBase& grad_base) {
+    TensorRef grad(grad_base);
+    fn(*grad);
+    return Tensor();
+  });
+}
+template <typename T>
+auto Tensor::register_hook(T&& hook) const -> Tensor::hook_return_var_t<T> {
+  return _register_hook([fn=std::forward<T>(hook)](const TensorBase& grad_base) {
+    TensorRef grad(grad_base);
+    Tensor ret = fn(*grad);
+    return TensorBase(std::move(ret));
+  });
+}
+} // namespace at

phivenv/Lib/site-packages/torch/include/ATen/core/TensorAccessor.h ADDED Viewed

	@@ -0,0 +1,275 @@

+#pragma once
+#include <c10/macros/Macros.h>
+#include <c10/util/ArrayRef.h>
+#include <c10/util/Deprecated.h>
+#include <c10/util/Exception.h>
+#include <c10/util/irange.h>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+namespace at {
+// The PtrTraits argument to the TensorAccessor/GenericPackedTensorAccessor
+// is used to enable the __restrict__ keyword/modifier for the data
+// passed to cuda.
+template <typename T>
+struct DefaultPtrTraits {
+  typedef T* PtrType;
+};
+#if defined(__CUDACC__) || defined(__HIPCC__)
+template <typename T>
+struct RestrictPtrTraits {
+  typedef T* __restrict__ PtrType;
+};
+#endif
+// TensorAccessorBase and TensorAccessor are used for both CPU and CUDA tensors.
+// For CUDA tensors it is used in device code (only). This means that we restrict ourselves
+// to functions and types available there (e.g. IntArrayRef isn't).
+// The PtrTraits argument is only relevant to cuda to support `__restrict__` pointers.
+template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
+class TensorAccessorBase {
+public:
+  typedef typename PtrTraits<T>::PtrType PtrType;
+  C10_HOST_DEVICE TensorAccessorBase(
+      PtrType data_,
+      const index_t* sizes_,
+      const index_t* strides_)
+      : data_(data_), sizes_(sizes_), strides_(strides_) {}
+  C10_HOST IntArrayRef sizes() const {
+    return IntArrayRef(sizes_,N);
+  }
+  C10_HOST IntArrayRef strides() const {
+    return IntArrayRef(strides_,N);
+  }
+  C10_HOST_DEVICE index_t stride(index_t i) const {
+    return strides_[i];
+  }
+  C10_HOST_DEVICE index_t size(index_t i) const {
+    return sizes_[i];
+  }
+  C10_HOST_DEVICE PtrType data() {
+    return data_;
+  }
+  C10_HOST_DEVICE const PtrType data() const {
+    return data_;
+  }
+protected:
+  PtrType data_;
+  const index_t* sizes_;
+  const index_t* strides_;
+};
+// The `TensorAccessor` is typically instantiated for CPU `Tensor`s using
+// `Tensor.accessor<T, N>()`.
+// For CUDA `Tensor`s, `GenericPackedTensorAccessor` is used on the host and only
+// indexing on the device uses `TensorAccessor`s.
+template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
+class TensorAccessor : public TensorAccessorBase<T,N,PtrTraits,index_t> {
+public:
+  typedef typename PtrTraits<T>::PtrType PtrType;
+  C10_HOST_DEVICE TensorAccessor(
+      PtrType data_,
+      const index_t* sizes_,
+      const index_t* strides_)
+      : TensorAccessorBase<T, N, PtrTraits, index_t>(data_,sizes_,strides_) {}
+  C10_HOST_DEVICE TensorAccessor<T, N - 1, PtrTraits, index_t> operator[](index_t i) {
+    return TensorAccessor<T,N-1,PtrTraits,index_t>(this->data_ + this->strides_[0]*i,this->sizes_+1,this->strides_+1);
+  }
+  C10_HOST_DEVICE const TensorAccessor<T, N-1, PtrTraits, index_t> operator[](index_t i) const {
+    return TensorAccessor<T,N-1,PtrTraits,index_t>(this->data_ + this->strides_[0]*i,this->sizes_+1,this->strides_+1);
+  }
+};
+template<typename T, template <typename U> class PtrTraits, typename index_t>
+class TensorAccessor<T,1,PtrTraits,index_t> : public TensorAccessorBase<T,1,PtrTraits,index_t> {
+public:
+  typedef typename PtrTraits<T>::PtrType PtrType;
+  C10_HOST_DEVICE TensorAccessor(
+      PtrType data_,
+      const index_t* sizes_,
+      const index_t* strides_)
+      : TensorAccessorBase<T, 1, PtrTraits, index_t>(data_,sizes_,strides_) {}
+  C10_HOST_DEVICE T & operator[](index_t i) {
+    // NOLINTNEXTLINE(clang-analyzer-core.NullDereference)
+    return this->data_[this->strides_[0]*i];
+  }
+  C10_HOST_DEVICE const T & operator[](index_t i) const {
+    return this->data_[this->strides_[0]*i];
+  }
+};
+// GenericPackedTensorAccessorBase and GenericPackedTensorAccessor are used on for CUDA `Tensor`s on the host
+// and as
+// In contrast to `TensorAccessor`s, they copy the strides and sizes on instantiation (on the host)
+// in order to transfer them on the device when calling kernels.
+// On the device, indexing of multidimensional tensors gives to `TensorAccessor`s.
+// Use RestrictPtrTraits as PtrTraits if you want the tensor's data pointer to be marked as __restrict__.
+// Instantiation from data, sizes, strides is only needed on the host and std::copy isn't available
+// on the device, so those functions are host only.
+template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
+class GenericPackedTensorAccessorBase {
+public:
+  typedef typename PtrTraits<T>::PtrType PtrType;
+  C10_HOST GenericPackedTensorAccessorBase(
+      PtrType data_,
+      const index_t* sizes_,
+      const index_t* strides_)
+      : data_(data_) {
+    std::copy(sizes_, sizes_ + N, std::begin(this->sizes_));
+    std::copy(strides_, strides_ + N, std::begin(this->strides_));
+  }
+  // if index_t is not int64_t, we want to have an int64_t constructor
+  template <typename source_index_t, class = std::enable_if_t<std::is_same_v<source_index_t, int64_t>>>
+  C10_HOST GenericPackedTensorAccessorBase(
+      PtrType data_,
+      const source_index_t* sizes_,
+      const source_index_t* strides_)
+      : data_(data_) {
+    for (const auto i : c10::irange(N)) {
+      this->sizes_[i] = sizes_[i];
+      this->strides_[i] = strides_[i];
+    }
+  }
+  C10_HOST_DEVICE index_t stride(index_t i) const {
+    return strides_[i];
+  }
+  C10_HOST_DEVICE index_t size(index_t i) const {
+    return sizes_[i];
+  }
+  C10_HOST_DEVICE PtrType data() {
+    return data_;
+  }
+  C10_HOST_DEVICE const PtrType data() const {
+    return data_;
+  }
+protected:
+  PtrType data_;
+  // NOLINTNEXTLINE(*c-arrays*)
+  index_t sizes_[N];
+  // NOLINTNEXTLINE(*c-arrays*)
+  index_t strides_[N];
+  C10_HOST void bounds_check_(index_t i) const {
+    TORCH_CHECK_INDEX(
+        0 <= i && i < index_t{N},
+        "Index ",
+        i,
+        " is not within bounds of a tensor of dimension ",
+        N);
+  }
+};
+template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
+class GenericPackedTensorAccessor : public GenericPackedTensorAccessorBase<T,N,PtrTraits,index_t> {
+public:
+  typedef typename PtrTraits<T>::PtrType PtrType;
+  C10_HOST GenericPackedTensorAccessor(
+      PtrType data_,
+      const index_t* sizes_,
+      const index_t* strides_)
+      : GenericPackedTensorAccessorBase<T, N, PtrTraits, index_t>(data_, sizes_, strides_) {}
+  // if index_t is not int64_t, we want to have an int64_t constructor
+  template <typename source_index_t, class = std::enable_if_t<std::is_same_v<source_index_t, int64_t>>>
+  C10_HOST GenericPackedTensorAccessor(
+      PtrType data_,
+      const source_index_t* sizes_,
+      const source_index_t* strides_)
+      : GenericPackedTensorAccessorBase<T, N, PtrTraits, index_t>(data_, sizes_, strides_) {}
+  C10_DEVICE TensorAccessor<T, N - 1, PtrTraits, index_t> operator[](index_t i) {
+    index_t* new_sizes = this->sizes_ + 1;
+    index_t* new_strides = this->strides_ + 1;
+    return TensorAccessor<T,N-1,PtrTraits,index_t>(this->data_ + this->strides_[0]*i, new_sizes, new_strides);
+  }
+  C10_DEVICE const TensorAccessor<T, N - 1, PtrTraits, index_t> operator[](index_t i) const {
+    const index_t* new_sizes = this->sizes_ + 1;
+    const index_t* new_strides = this->strides_ + 1;
+    return TensorAccessor<T,N-1,PtrTraits,index_t>(this->data_ + this->strides_[0]*i, new_sizes, new_strides);
+  }
+  /// Returns a PackedTensorAccessor of the same dimension after transposing the
+  /// two dimensions given. Does not actually move elements; transposition is
+  /// made by permuting the size/stride arrays. If the dimensions are not valid,
+  /// asserts.
+  C10_HOST GenericPackedTensorAccessor<T, N, PtrTraits, index_t> transpose(
+      index_t dim1,
+      index_t dim2) const {
+    this->bounds_check_(dim1);
+    this->bounds_check_(dim2);
+    GenericPackedTensorAccessor<T, N, PtrTraits, index_t> result(
+        this->data_, this->sizes_, this->strides_);
+    std::swap(result.strides_[dim1], result.strides_[dim2]);
+    std::swap(result.sizes_[dim1], result.sizes_[dim2]);
+    return result;
+  }
+};
+template<typename T, template <typename U> class PtrTraits, typename index_t>
+class GenericPackedTensorAccessor<T,1,PtrTraits,index_t> : public GenericPackedTensorAccessorBase<T,1,PtrTraits,index_t> {
+public:
+  typedef typename PtrTraits<T>::PtrType PtrType;
+  C10_HOST GenericPackedTensorAccessor(
+      PtrType data_,
+      const index_t* sizes_,
+      const index_t* strides_)
+      : GenericPackedTensorAccessorBase<T, 1, PtrTraits, index_t>(data_, sizes_, strides_) {}
+  // if index_t is not int64_t, we want to have an int64_t constructor
+  template <typename source_index_t, class = std::enable_if_t<std::is_same_v<source_index_t, int64_t>>>
+  C10_HOST GenericPackedTensorAccessor(
+      PtrType data_,
+      const source_index_t* sizes_,
+      const source_index_t* strides_)
+      : GenericPackedTensorAccessorBase<T, 1, PtrTraits, index_t>(data_, sizes_, strides_) {}
+  C10_DEVICE T & operator[](index_t i) {
+    return this->data_[this->strides_[0] * i];
+  }
+  C10_DEVICE const T& operator[](index_t i) const {
+    return this->data_[this->strides_[0]*i];
+  }
+  // Same as in the general N-dimensional case, but note that in the
+  // 1-dimensional case the returned PackedTensorAccessor will always be an
+  // identical copy of the original
+  C10_HOST GenericPackedTensorAccessor<T, 1, PtrTraits, index_t> transpose(
+      index_t dim1,
+      index_t dim2) const {
+    this->bounds_check_(dim1);
+    this->bounds_check_(dim2);
+    return GenericPackedTensorAccessor<T, 1, PtrTraits, index_t>(
+        this->data_, this->sizes_, this->strides_);
+  }
+};
+// Can't put this directly into the macro function args because of commas
+#define AT_X GenericPackedTensorAccessor<T, N, PtrTraits, index_t>
+// Old name for `GenericPackedTensorAccessor`
+template <typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
+C10_DEFINE_DEPRECATED_USING(PackedTensorAccessor, AT_X)
+#undef AT_X
+template <typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits>
+using PackedTensorAccessor32 = GenericPackedTensorAccessor<T, N, PtrTraits, int32_t>;
+template <typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits>
+using PackedTensorAccessor64 = GenericPackedTensorAccessor<T, N, PtrTraits, int64_t>;
+} // namespace at

phivenv/Lib/site-packages/torch/include/ATen/core/TensorBase.h ADDED Viewed

	@@ -0,0 +1,1056 @@

+#pragma once
+#include <c10/core/Device.h>
+#include <c10/core/Layout.h>
+#include <c10/core/MemoryFormat.h>
+#include <c10/core/ScalarType.h>
+#include <c10/core/ScalarTypeToTypeMeta.h>
+#include <c10/core/Storage.h>
+#include <c10/core/SymIntArrayRef.h>
+#include <c10/core/TensorImpl.h>
+#include <c10/core/TensorOptions.h>
+#include <c10/core/UndefinedTensorImpl.h>
+#include <c10/core/WrapDimMinimal.h>
+#include <c10/util/C++17.h>
+#include <c10/util/Exception.h>
+#include <c10/util/ExclusivelyOwned.h>
+#include <c10/util/ExclusivelyOwnedTensorTraits.h>
+#include <c10/util/MaybeOwned.h>
+#include <optional>
+#include <c10/util/intrusive_ptr.h>
+#include <ATen/core/NamedTensor.h>
+#include <ATen/core/QuantizerBase.h>
+#include <ATen/core/TensorAccessor.h>
+#include <ATen/StorageUtils.h>
+namespace c10 {
+class Scalar;
+}
+namespace torch::autograd {
+struct Node;
+} // namespace torch::autograd
+namespace at {
+class Tensor;
+class TensorBase;
+// Convert Tensor to TensorBase without any need to include Tensor.h
+TORCH_API const TensorBase& get_tensor_base(const Tensor& t);
+namespace impl {
+inline bool variable_excluded_from_dispatch() {
+#ifdef C10_MOBILE
+  // Please read the comment in `VariableFallbackKernel.cpp` about the background of this change.
+  return true;
+#else
+  return c10::impl::tls_local_dispatch_key_set().excluded_.isSupersetOf(c10::autograd_dispatch_keyset);
+#endif
+}
+}
+// NOTE: [Tensor vs. TensorBase]
+//
+// Tensor, being the central data structure in PyTorch, gets used and
+// its header included almost everywhere. Unfortunately this means
+// every time an operator signature is updated or changed in
+// native_functions.yaml, you (and every other PyTorch developer) need
+// to recompile all of ATen and its dependencies.
+//
+// TensorBase aims to break up these header dependencies, and improve
+// incremental build times for all PyTorch developers. TensorBase
+// represents a reference counted handle to TensorImpl, exactly the
+// same as Tensor. However, TensorBase doesn't have code generated
+// methods in its API and thus no dependence on native_functions.yaml.
+//
+// Usage tips
+// ----------
+// - You can `#define TORCH_ASSERT_NO_OPERATORS` at the top of a .cpp
+//   or .cu file to ensure it has no header dependencies on
+//   native_functions.yaml (direct or indirect).
+// - Tensor inherits from TensorBase, so functions taking
+//   `const TensorBase &` are callable with Tensor as well.
+// - TensorBase can be converted to Tensor with `Tensor(tensor_base)`,
+//   but this requires a reference-count bump. OptionalTensorRef, on
+//   the other hand, can materialize a `const Tensor &` without
+//   touching the reference-count.
+class TORCH_API TensorBase {
+ public:
+  struct unsafe_borrow_t { explicit unsafe_borrow_t() = default; };
+ protected:
+  // Create a Tensor with a +0 reference count. Special care must be
+  // taken to avoid decrementing this reference count at destruction
+  // time. Intended to support MaybeOwnedTraits<Tensor>.
+  explicit TensorBase(unsafe_borrow_t, const TensorBase& rhs)
+      : impl_(c10::intrusive_ptr<at::TensorImpl, UndefinedTensorImpl>(rhs.impl_.get(), c10::raw::DontIncreaseRefcount{})) {}
+  friend MaybeOwnedTraits<TensorBase>;
+ public:
+  TensorBase() = default;
+  // This constructor should not be used by end users and is an implementation
+  // detail invoked by autogenerated code.
+  explicit TensorBase(
+      c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl)
+      : impl_(std::move(tensor_impl)) {
+    if (impl_.get() == nullptr) {
+      throw std::runtime_error("TensorImpl with nullptr is not supported");
+    }
+  }
+  TensorBase(const TensorBase&) = default;
+  TensorBase(TensorBase&&) noexcept = default;
+  ~TensorBase() noexcept = default;
+ public:
+  // Creates a new wrapper from TensorImpl. Intentionally a free method because
+  // it should be used with care. Checks necessary invariants
+  static TensorBase wrap_tensor_impl(
+      c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl) {
+    TensorBase r(std::move(tensor_impl));
+    r.enforce_invariants();
+    return r;
+  }
+  int64_t dim() const {
+    return impl_->dim();
+  }
+  int64_t storage_offset() const {
+    return impl_->storage_offset();
+  }
+  TensorBase contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) const {
+    if (is_contiguous(memory_format)) {
+      return *this;
+    } else {
+      return __dispatch_contiguous(memory_format);
+    }
+  }
+  /// Should be used if *this can reasonably be expected to be contiguous and
+  /// performance is important.
+  /// Compared to contiguous, it saves a reference count
+  /// increment/decrement if *this is already contiguous, at the cost
+  /// in all cases of an extra pointer of stack usage, an extra branch
+  /// to access, and an extra branch at destruction time.
+  c10::MaybeOwned<TensorBase> expect_contiguous(
+      MemoryFormat memory_format=MemoryFormat::Contiguous) const &;
+  // Use .contiguous() instead. Trying to borrow from a prvalue
+  // will only lead to trouble and dangling references.
+  c10::MaybeOwned<TensorBase> expect_contiguous(
+      MemoryFormat memory_format=MemoryFormat::Contiguous) && = delete;
+  const TensorBase& fill_(const c10::Scalar& scalar) const;
+  const TensorBase& zero_() const;
+  TensorBase to(at::TensorOptions options={}, bool non_blocking=false, bool copy=false, std::optional<at::MemoryFormat> memory_format=std::nullopt) const;
+  bool is_complex() const {
+    return at::isComplexType(this->scalar_type());
+  }
+  bool is_floating_point() const {
+    return at::isFloatingType(this->scalar_type());
+  }
+  bool is_signed() const {
+    return at::isSignedType(this->scalar_type());
+  }
+  c10::SymInt sym_size(int64_t dim) const {
+    return impl_->sym_size(dim);
+  }
+  c10::SymInt sym_stride(int64_t dim) const {
+    const auto sizes = this->sym_strides();
+    const auto ndim = static_cast<int64_t>(sizes.size());
+    // false is passed to maybe_wrap_dim so behavior is identical to array access (but with wrapping)
+    return sizes[c10::maybe_wrap_dim(dim, ndim, /*wrap_scalar=*/false)];
+  }
+  int64_t size(int64_t dim) const {
+    return impl_->size(dim);
+  }
+  int64_t stride(int64_t dim) const {
+    const auto strides = this->strides();
+    const auto ndim = static_cast<int64_t>(strides.size());
+    // false is passed to maybe_wrap_dim so behavior is identical to array access (but with wrapping)
+    return strides[c10::maybe_wrap_dim(dim, ndim, /*wrap_scalar=*/false)];
+  }
+  TensorImpl * unsafeGetTensorImpl() const {
+    return impl_.get();
+  }
+  TensorImpl * unsafeReleaseTensorImpl() {
+    return impl_.release();
+  }
+  const c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl>& getIntrusivePtr() const {
+    return impl_;
+  }
+  c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> unsafeReleaseIntrusivePtr() {
+    return std::move(impl_);
+  }
+  bool defined() const {
+    return impl_;
+  }
+  void reset() {
+    impl_.reset();
+  }
+#if defined (_MSC_VER)
+  TensorBase& operator=(const TensorBase& x) & {
+    impl_ = x.impl_;
+    return *this;
+  };
+  TensorBase& operator=(TensorBase&& x) & noexcept {
+    impl_ = std::move(x.impl_);
+    return *this;
+  }
+#else
+  TensorBase& operator=(const TensorBase& x) & = default;
+  TensorBase& operator=(TensorBase&& x) & noexcept = default;
+#endif
+  // Ban assignment to rvalues, since at::Tensor (weirdly) performs a deep copy here
+  TensorBase& operator=(const TensorBase&) && = delete;
+  TensorBase& operator=(TensorBase&&) && noexcept = delete;
+  bool is_same(const TensorBase& other) const noexcept {
+    return impl_ == other.impl_;
+  }
+  size_t use_count() const noexcept {
+    return impl_.use_count();
+  }
+  size_t weak_use_count() const noexcept {
+    return impl_.weak_use_count();
+  }
+  std::string toString() const;
+  IntArrayRef sizes() const {
+    return impl_->sizes();
+  }
+  c10::SymIntArrayRef sym_sizes() const {
+    return impl_->sym_sizes();
+  }
+  c10::SymIntArrayRef sym_strides() const {
+    return impl_->sym_strides();
+  }
+  IntArrayRef strides() const {
+    return impl_->strides();
+  }
+  // See impl::get_opt_names in ATen/NamedTensor.h for docs.
+  std::optional<DimnameList> opt_names() const {
+    return impl::get_opt_names(unsafeGetTensorImpl());
+  }
+  // See impl::get_names in ATen/NamedTensor.h for docs.
+  DimnameList names() const {
+    return impl::get_names(unsafeGetTensorImpl());
+  }
+  int64_t ndimension() const {
+    return dim();
+  }
+  bool is_contiguous(at::MemoryFormat memory_format=at::MemoryFormat::Contiguous) const {
+    return impl_->is_contiguous(memory_format);
+  }
+  bool is_non_overlapping_and_dense() const {
+    return impl_->is_non_overlapping_and_dense();
+  }
+  at::MemoryFormat suggest_memory_format(
+      bool channels_last_strides_exact_match = false) const {
+    // Setting channels_last_strides_exact_match to true forces function to
+    // check 0,1 - sized dimension strides.
+    if (layout() == at::kStrided) {
+      if (impl_->is_strides_like_channels_last()) {
+        if (!channels_last_strides_exact_match ||
+            get_channels_last_strides_2d(sizes()) == strides()) {
+          return at::MemoryFormat::ChannelsLast;
+        }
+      }
+      else if (impl_->is_strides_like_channels_last_3d()) {
+        if (!channels_last_strides_exact_match ||
+            get_channels_last_strides_3d(sizes()) == strides()) {
+          return at::MemoryFormat::ChannelsLast3d;
+        }
+      }
+    }
+    return at::MemoryFormat::Contiguous;
+  }
+  // Total bytes consumed by the "view" of elements of the array.  Does not
+  // include size of metadata.  The number reported here does not necessarily
+  // correspond to the true physical memory consumed by a tensor; instead,
+  // it reports the memory the tensor would take *if* it were contiguous.
+  // Defined to be numel() * itemsize()
+  size_t nbytes() const {
+    TORCH_CHECK(layout () != at::kSparse,
+                "nbytes is not defined for sparse tensors.  If you want the size of the constituent " \
+                "tensors, add the nbytes of the indices and values.  If you want the size of the  " \
+                "equivalent dense tensor, multiply numel() by element_size()");
+    return impl_->numel() * impl_->itemsize();
+  }
+  c10::SymInt sym_nbytes() const {
+    TORCH_CHECK(layout () != at::kSparse,
+                "nbytes is not defined for sparse tensors.  If you want the size of the constituent " \
+                "tensors, add the nbytes of the indices and values.  If you want the size of the  " \
+                "equivalent dense tensor, multiply numel() by element_size()");
+    return impl_->sym_numel() * impl_->itemsize();
+  }
+  int64_t numel() const {
+    return impl_->numel();
+  }
+  c10::SymInt sym_numel() const {
+    return impl_->sym_numel();
+  }
+  c10::SymInt sym_storage_offset() const {
+    return impl_->sym_storage_offset();
+  }
+  // Length of one array element in bytes.  This is the traditional
+  // Numpy naming.
+  size_t itemsize() const {
+    return impl_->itemsize();
+  }
+  // Same as itemsize().  This is the PyTorch naming.
+  int64_t element_size() const {
+    return static_cast<int64_t>(impl_->itemsize());
+  }
+  DispatchKeySet key_set() const {
+    return impl_->key_set();
+  }
+  ScalarType scalar_type() const {
+    return typeMetaToScalarType(impl_->dtype());
+  }
+  bool has_storage() const {
+    return defined() && impl_->has_storage();
+  }
+  const Storage& storage() const {
+    return impl_->storage();
+  }
+  bool is_alias_of(const at::TensorBase& other) const{
+    return impl_->storage().is_alias_of(other.storage());
+  }
+  // Move the storage backend to shm based
+  // to enable memory sharing across processes.
+  //
+  // NB1: the ideal behavior of this API still requires further discussion
+  // but for now we are inclined to keep it consistent with existing THP behavior
+  // https://github.com/pytorch/pytorch/blob/4dca9bde0552afc67b5b74f4a0696fe6055709c4/torch/storage.py#L196-L212
+  // so we don't assert on anything here and rely on caller knowing
+  // what it's doing.
+  //
+  // NB2: this currently provides Linux fd based shm support only
+  // to simplify the storage lifetime management logic in ATen
+  // and similarly for now we are not adding support for file system based
+  // shm support like in THP due to additional GC manager support needed
+  // to prevent leaks.
+  // As such, calling this from non supported systems (e.g. Windows) would fail.
+  void share_memory_() {
+    at::share_memory_(*this);
+  }
+  inline bool _is_zerotensor() const {
+    return impl_->_is_zerotensor();
+  }
+  inline void _set_zero(bool zero) const {
+    impl_->_set_zero(zero);
+  }
+  inline bool is_conj() const {
+    return impl_->is_conj();
+  }
+  // sets the conjugate bit of a tensor.
+  // NOTE: Conjugate bit is supposed to be a read-only field. Only change this, if you are sure
+  // that's what you want. Changing this might lead to incorrect behavior since conjugation is
+  // a lazy operation and we rely on this bit to determine if a conjugation needs to be materialized.
+  inline void _set_conj(bool conjugate) const {
+    impl_->_set_conj(conjugate);
+  }
+  inline bool is_neg() const {
+    return impl_->is_neg();
+  }
+  // sets the negative bit of a tensor.
+  // NOTE: Negative bit is supposed to be a read-only field. Only change this, if you are sure
+  // that's what you want. Changing this might lead to incorrect behavior since we rely on this
+  // bit to determine if a negation needs to be materialized.
+  inline void _set_neg(bool negative) const {
+    impl_->_set_neg(negative);
+  }
+  /// Returns a `Tensor`'s layout.
+  Layout layout() const {
+    return impl_->layout();
+  }
+  /// Returns a `Tensor`'s dtype (`TypeMeta`).
+  caffe2::TypeMeta dtype() const {
+    return impl_->dtype();
+  }
+  /// Returns a `Tensor`'s device.
+  inline Device device() const {
+    return impl_->device();
+  }
+  /// Returns a `Tensor`'s device index.
+  DeviceIndex get_device() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->get_device();
+  }
+  /// Returns if a `Tensor` has CPU backend.
+  bool is_cpu() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_cpu();
+  }
+  /// Returns if a `Tensor` has CUDA backend.
+  bool is_cuda() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_cuda();
+  }
+  /// Returns if a `Tensor` has IPU backend.
+  bool is_ipu() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_ipu();
+  }
+  /// Returns if a `Tensor` has XPU backend.
+  bool is_xpu() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_xpu();
+  }
+  /// Returns if a `Tensor` has XLA backend.
+  bool is_xla() const {
+    return impl_->is_xla();
+  }
+  /// Returns if a `Tensor` has MTIA backend.
+  bool is_mtia() const {
+    return impl_->is_mtia();
+  }
+  /// Returns if a `Tensor` has HPU backend.
+  bool is_hpu() const {
+    return impl_->is_hpu();
+  }
+  /// Returns if a `Tensor` has Lazy backend.
+  bool is_lazy() const {
+    return impl_->is_lazy();
+  }
+  /// Returns if a `Tensor` has HIP backend.
+  bool is_hip() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_hip();
+  }
+  /// Returns if a `Tensor` has VE backend.
+  bool is_ve() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_ve();
+  }
+  /// Returns if a `Tensor` has PrivateUse1 backend.
+  bool is_privateuseone() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_privateuseone();
+  }
+  /// Returns if a `Tensor` has sparse backend.
+  bool is_sparse() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_sparse();
+  }
+  /// Returns is a `Tensor` has a sparse CSR backend.
+  bool is_sparse_csr() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_sparse_csr();
+  }
+  /// Returns if a `Tensor` is mkldnn tensor.
+  bool is_mkldnn() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_mkldnn();
+  }
+  /// Returns if a `Tensor` is mps tensor.
+  bool is_mps() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_mps();
+  }
+  /// Returns if a `Tensor` is maia tensor.
+  bool is_maia() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_maia();
+  }
+  /// Returns if a `Tensor` is vulkan tensor.
+  bool is_vulkan() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_vulkan();
+  }
+  /// Returns if a `Tensor` is metal tensor.
+  bool is_metal() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_metal();
+  }
+  /// Returns if a `Tensor` has quantized backend.
+  bool is_quantized() const {
+    // NB: this is not a native function to avoid dispatching overhead.
+    return impl_->is_quantized();
+  }
+  /// Returns if a `Tensor` is a meta tensor.  Meta tensors can
+  /// also have other designations.
+  bool is_meta() const {
+    return impl_->is_meta();
+  }
+  /// Returns if a `Tensor` is an inference tensor.
+  bool is_inference() const {
+    return impl_->is_inference();
+  }
+  // Returns if a `Tensor` is a NestedTensor.
+  bool is_nested() const {
+    return impl_->is_nested();
+  }
+  /// If a tensor is a quantized tensor, returns its quantizer
+  /// TODO: it's not in native_functions.yaml yet as it's not exposed to python
+  QuantizerPtr quantizer() const;
+  /// Returns if a `Tensor` has any dimension names
+  bool has_names() const {
+    // If a user is using unnamed tensors, then we can short-circuit right here.
+    // Otherwise, impl::has_names attempts to retrieve names.
+    if (!impl_->has_named_tensor_meta()) {
+      return false;
+    }
+    return impl::has_names(unsafeGetTensorImpl());
+  }
+  /// Returns a `Tensor`'s dimension names data structure
+  const NamedTensorMeta* get_named_tensor_meta() const {
+    return static_cast<NamedTensorMeta*>(impl_->named_tensor_meta());
+  }
+  NamedTensorMeta* get_named_tensor_meta() {
+    return static_cast<NamedTensorMeta*>(impl_->named_tensor_meta());
+  }
+  /// Returns the `TensorOptions` corresponding to this `Tensor`. Defined in
+  /// TensorOptions.h.
+  TensorOptions options() const {
+    return TensorOptions().dtype(dtype())
+                          .device(device())
+                          .layout(layout());
+  }
+  const void* const_data_ptr() const {
+    return this->unsafeGetTensorImpl()->data();
+  }
+  void* mutable_data_ptr() const {
+    return this->unsafeGetTensorImpl()->mutable_data();
+  }
+  // TODO(#97856) Make this return a const pointer. This currently
+  //              returns a non-const pointer because of the large
+  //              number of clients that we still want to audit before
+  //              migrating to mutable_data_ptr().
+  void* data_ptr() const {
+    return mutable_data_ptr();
+  }
+  template <typename T, std::enable_if_t<!std::is_const_v<T>, int> = 0>
+  const T* const_data_ptr() const;
+  template <typename T, std::enable_if_t<std::is_const_v<T>, int> = 0>
+  const std::remove_const_t<T>* const_data_ptr() const;
+  template <typename T>
+  T* mutable_data_ptr() const;
+  // Legacy interface during the migration to indicate that a callsite
+  // has not been audited for mutability.
+  //
+  // Do not add new uses of this, use const_data_ptr() if possible,
+  // mutable_data_ptr() otherwise.
+  //
+  // TODO(#97856) Make this return a const pointer. This is currently
+  //              const because of the vast number of clients that
+  //              rely on this.
+  template <typename T>
+  T* data_ptr() const;
+  // Purposely not defined here to avoid inlining
+  void print() const;
+  // Return a `TensorAccessor` for CPU `Tensor`s. You have to specify scalar type and
+  // dimension.
+  template<typename T, size_t N>
+  TensorAccessor<T,N> accessor() const& {
+    static_assert(N > 0, "accessor is used for indexing tensor, for scalars use *data_ptr<T>()");
+    TORCH_CHECK(dim() == N, "TensorAccessor expected ", N, " dims but tensor has ", dim());
+    T* ptr = nullptr;
+    if constexpr (std::is_const_v<T>) {
+      ptr = const_data_ptr<T>();
+    } else {
+      ptr = mutable_data_ptr<T>();
+    }
+    return TensorAccessor<T,N>(ptr,sizes().data(),strides().data());
+  }
+  template<typename T, size_t N>
+  TensorAccessor<T,N> accessor() && = delete;
+  // Return a `GenericPackedTensorAccessor` for CUDA `Tensor`s. You have to specify scalar type and
+  // dimension. You can optionally specify RestrictPtrTraits as a template parameter to
+  // cast the data pointer to a __restrict__ pointer.
+  // In order to use this, your CUDA kernel has to take a corresponding GenericPackedTensorAccessor
+  // as an argument.
+  template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
+  GenericPackedTensorAccessor<T,N,PtrTraits,index_t> generic_packed_accessor() const& {
+    static_assert(N > 0, "accessor is used for indexing tensor, for scalars use *data_ptr<T>()");
+    TORCH_CHECK(dim() == N, "TensorAccessor expected ", N, " dims but tensor has ", dim());
+    T* ptr = nullptr;
+    if constexpr (std::is_const_v<T>) {
+      ptr = const_data_ptr<T>();
+    } else {
+      ptr = mutable_data_ptr<T>();
+    }
+    return GenericPackedTensorAccessor<T,N,PtrTraits,index_t>(static_cast<typename PtrTraits<T>::PtrType>(ptr),sizes().data(),strides().data());
+  }
+  template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits, typename index_t = int64_t>
+  GenericPackedTensorAccessor<T,N> generic_packed_accessor() && = delete;
+  template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits>
+  PackedTensorAccessor32<T,N,PtrTraits> packed_accessor32() const& {
+    TORCH_CHECK(
+        impl_->numel() <=
+            static_cast<int64_t>(std::numeric_limits<int32_t>::max()),
+        "numel needs to be smaller than int32_t max; otherwise, please use packed_accessor64");
+    return generic_packed_accessor<T,N,PtrTraits,int32_t>();
+  }
+  template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits>
+  PackedTensorAccessor32<T,N,PtrTraits> packed_accessor32() && = delete;
+  template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits>
+  PackedTensorAccessor64<T,N,PtrTraits> packed_accessor64() const& {
+    return generic_packed_accessor<T,N,PtrTraits,int64_t>();
+  }
+  template<typename T, size_t N, template <typename U> class PtrTraits = DefaultPtrTraits>
+  PackedTensorAccessor64<T,N,PtrTraits> packed_accessor64() && = delete;
+  // ~~~~~ Autograd API ~~~~~
+  /// \fn bool is_leaf() const;
+  ///
+  /// All Tensors that have `requires_grad()` which is ``false`` will be leaf Tensors by convention.
+  ///
+  /// For Tensors that have `requires_grad()` which is ``true``, they will be leaf Tensors if they were
+  /// created by the user. This means that they are not the result of an operation and so
+  /// `grad_fn()` is `nullptr`.
+  ///
+  /// Only leaf Tensors will have their `grad()` populated during a call to `backward()`.
+  /// To get `grad()` populated for non-leaf Tensors, you can use `retain_grad()`.
+  ///
+  /// Example:
+  /// @code
+  /// auto a = torch::rand(10, torch::requires_grad());
+  /// std::cout << a.is_leaf() << std::endl; // prints `true`
+  ///
+  /// auto b = torch::rand(10, torch::requires_grad()).to(torch::kCUDA);
+  /// std::cout << b.is_leaf() << std::endl; // prints `false`
+  /// // b was created by the operation that cast a cpu Tensor into a cuda Tensor
+  ///
+  /// auto c = torch::rand(10, torch::requires_grad()) + 2;
+  /// std::cout << c.is_leaf() << std::endl; // prints `false`
+  /// // c was created by the addition operation
+  ///
+  /// auto d = torch::rand(10).cuda();
+  /// std::cout << d.is_leaf() << std::endl; // prints `true`
+  /// // d does not require gradients and so has no operation creating it (that is tracked by the autograd engine)
+  ///
+  /// auto e = torch::rand(10).cuda().requires_grad_();
+  /// std::cout << e.is_leaf() << std::endl; // prints `true`
+  /// // e requires gradients and has no operations creating it
+  ///
+  /// auto f = torch::rand(10, torch::device(torch::kCUDA).requires_grad(true));
+  /// std::cout << f.is_leaf() << std::endl; // prints `true`
+  /// // f requires grad, has no operation creating it
+  /// @endcode
+  /// \fn void backward(const Tensor & gradient={}, std::optional<bool> retain_graph=std::nullopt, bool create_graph=false, std::optional<TensorList> inputs=std::nullopt) const;
+  ///
+  /// Computes the gradient of current tensor with respect to graph leaves.
+  ///
+  /// The graph is differentiated using the chain rule. If the tensor is
+  /// non-scalar (i.e. its data has more than one element) and requires
+  /// gradient, the function additionally requires specifying ``gradient``.
+  /// It should be a tensor of matching type and location, that contains
+  /// the gradient of the differentiated function w.r.t. this Tensor.
+  ///
+  /// This function accumulates gradients in the leaves - you might need to
+  /// zero them before calling it.
+  ///
+  /// \param gradient Gradient w.r.t. the
+  ///     tensor. If it is a tensor, it will be automatically converted
+  ///     to a Tensor that does not require grad unless ``create_graph`` is True.
+  ///     None values can be specified for scalar Tensors or ones that
+  ///     don't require grad. If a None value would be acceptable then
+  ///     this argument is optional.
+  /// \param retain_graph If ``false``, the graph used to compute
+  ///     the grads will be freed. Note that in nearly all cases setting
+  ///     this option to True is not needed and often can be worked around
+  ///     in a much more efficient way. Defaults to the value of
+  ///     ``create_graph``.
+  /// \param create_graph If ``true``, graph of the derivative will
+  ///     be constructed, allowing to compute higher order derivative
+  ///     products. Defaults to ``false``.
+  /// \param inputs Inputs w.r.t. which the gradient will be accumulated into
+  ///     ``at::Tensor::grad``. All other Tensors will be ignored. If not
+  ///     provided, the gradient is accumulated into all the leaf Tensors
+  ///     that were used to compute the current tensor.
+  ///     When inputs are provided and a given input is not a leaf,
+  ///     the current implementation will call its grad_fn (even though it is not strictly needed to get this gradients).
+  ///     It is an implementation detail on which the user should not rely.
+  ///     See https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for more details.
+  /// \fn Tensor detach() const;
+  ///
+  /// Returns a new Tensor, detached from the current graph.
+  /// The result will never require gradient.
+  /// \fn Tensor & detach_() const;
+  ///
+  /// Detaches the Tensor from the graph that created it, making it a leaf.
+  /// Views cannot be detached in-place.
+  /// \fn void retain_grad() const;
+  ///
+  /// Enables this Tensor to have their :attr:`grad` populated during
+  /// :func:`backward`. This is a no-op for leaf tensors.
+  /// \fn bool retains_grad() const;
+  ///
+  /// Is ``true`` if this Tensor is non-leaf and its :attr:`grad` is enabled to be
+  /// populated during :func:`backward`, ``false`` otherwise.
+  const TensorBase& set_requires_grad(bool requires_grad) const {
+    impl_->set_requires_grad(requires_grad);
+    return *this;
+  }
+  bool requires_grad() const {
+    return impl_->requires_grad();
+  }
+  // The Forward AD API functions below are low level and are not to be used by end
+  // users who should use the API provided in torch/csrc/autograd.h
+  /// This function returns the forward gradient for this Tensor at the given level.
+  const Tensor& _fw_grad(uint64_t level) const {
+    return impl_->_fw_grad(level, *this);
+  }
+  /// This function can be used to set the value of the forward grad.
+  /// Note that the given new_grad might not be used directly if it has different
+  /// metadata (size/stride/storage offset) compared to this Tensor. In that case,
+  /// new_grad content will be copied into a new Tensor
+  void _set_fw_grad(const TensorBase& new_grad, uint64_t level, bool is_inplace_op) const {
+    impl_->_set_fw_grad(new_grad, *this, level, is_inplace_op);
+  }
+  /// NOTE: This is similar to the legacy `.data()` function on `Variable`, and is intended
+  /// to be used from functions that need to access the `Variable`'s equivalent `Tensor`
+  /// (i.e. `Tensor` that shares the same storage and tensor metadata with the `Variable`).
+  ///
+  /// One notable difference with the legacy `.data()` function is that changes to the
+  /// returned `Tensor`'s tensor metadata (e.g. sizes / strides / storage / storage_offset)
+  /// will not update the original `Variable`, due to the fact that this function
+  /// shallow-copies the `Variable`'s underlying TensorImpl.
+  at::TensorBase tensor_data() const;
+  /// NOTE: `var.variable_data()` in C++ has the same semantics as `tensor.data`
+  /// in Python, which create a new `Variable` that shares the same storage and
+  /// tensor metadata with the original `Variable`, but with a completely new
+  /// autograd history.
+  ///
+  /// NOTE: If we change the tensor metadata (e.g. sizes / strides /
+  /// storage / storage_offset) of a variable created from `var.variable_data()`, those
+  /// changes will not update the original variable `var`. In `.variable_data()`, we set
+  /// `allow_tensor_metadata_change_` to false to make such changes explicitly illegal,
+  /// in order to prevent users from changing metadata of `var.variable_data()`
+  /// and expecting the original variable `var` to also be updated.
+  at::TensorBase variable_data() const;
+  // Gradient Node and Edges
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  /// Gets the gradient function of the `Variable`. If this is a leaf variable,
+  /// the pointer returned will be null.
+  ///
+  /// For View Variables:
+  /// Gets the up-to-date grad_fn. If the shared data or base was modified, we
+  /// re-create the grad_fn to express the up-to-date view relationship between
+  /// this and the base Variable.
+  const std::shared_ptr<torch::autograd::Node>& grad_fn() const;
+  // Hooks
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  template <typename T>
+  using hook_return_void_t = std::enable_if_t<std::is_void_v<typename std::invoke_result_t<T&, TensorBase>>, unsigned>;
+  template <typename T>
+  using hook_return_var_t = std::enable_if_t<std::is_same_v<typename std::invoke_result_t<T&, TensorBase>, TensorBase>, unsigned>;
+  /// Registers a backward hook.
+  ///
+  /// The hook will be called every time a gradient with respect to the Tensor is computed.
+  /// The hook should have one of the following signature:
+  /// ```
+  /// hook(TensorBase grad) -> TensorBase
+  /// ```
+  /// ```
+  /// hook(TensorBase grad) -> void
+  /// ```
+  /// The hook should not modify its argument, but it can optionally return a new gradient
+  /// which will be used in place of `grad`.
+  ///
+  /// This function returns the index of the hook in the list which can be used to remove hook.
+  ///
+  /// Example:
+  /// @code
+  /// auto v = torch::tensor({0., 0., 0.}, torch::requires_grad());
+  /// auto h = v.register_hook([](torch::Tensor grad){ return grad * 2; }); // double the gradient
+  /// v.backward(torch::tensor({1., 2., 3.}));
+  /// // This prints:
+  /// // ```
+  /// //  2
+  /// //  4
+  /// //  6
+  /// // [ CPUFloatType{3} ]
+  /// // ```
+  /// std::cout << v.grad() << std::endl;
+  /// v.remove_hook(h);  // removes the hook
+  /// @endcode
+  template <typename T>
+  hook_return_void_t<T> register_hook(T&& hook) const;
+  template <typename T>
+  hook_return_var_t<T> register_hook(T&& hook) const;
+protected:
+  unsigned _register_hook(std::function<TensorBase(const TensorBase&)> hook) const;
+public:
+  /// Remove hook at given position
+  void remove_hook(unsigned pos) const;
+  // Variable methods
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  bool is_leaf() const;
+  int64_t output_nr() const;
+  void set_data(const TensorBase & new_data) const;
+  TensorBase data() const;
+  int64_t _version() const;
+  void retain_grad() const;
+  bool retains_grad() const;
+  const TensorBase& requires_grad_(bool _requires_grad=true) const;
+  // View Variables
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  /// Returns true if this `Variable` is a view of another `Variable`.
+  bool is_view() const;
+  /// Returns the `Variable` that this `Variable` is a view of. If this
+  /// `Variable` is not a view, throw a `std::runtime_error`.
+  const TensorBase& _base() const;
+  // Miscellaneous
+  //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+  const std::string& name() const;
+protected:
+  void enforce_invariants();
+  c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> impl_;
+private:
+  TensorBase __dispatch_contiguous(c10::MemoryFormat) const;
+};
+inline DeviceIndex get_device(const TensorBase& self) {
+  return self.get_device();
+}
+template <typename T>
+auto TensorBase::register_hook(T&& hook) const -> TensorBase::hook_return_void_t<T> {
+  // Return the grad argument in case of a hook with void return type to have an
+  // std::function with Tensor return type
+  static_assert(std::is_same_v<decltype(hook(TensorBase())), void>,
+                "Expected hook to return void");
+  return _register_hook([fn=std::forward<T>(hook)](const TensorBase& grad) {
+    fn(grad);
+    return TensorBase();
+  });
+}
+template <typename T>
+auto TensorBase::register_hook(T&& hook) const -> TensorBase::hook_return_var_t<T> {
+  return _register_hook(std::forward<T>(hook));
+}
+namespace detail {
+// Helper creator for Tensor class which doesn't requires the users to pass
+// in an intrusive_ptr instead it just converts the argument passed to
+// requested intrusive_ptr type.
+template <typename T, typename... Args>
+TensorBase make_tensor_base(Args&&... args) {
+  return TensorBase(c10::make_intrusive<T>(std::forward<Args>(args)...));
+}
+} // namespace detail
+inline DispatchKey legacyExtractDispatchKey(const TensorBase& t) {
+  return legacyExtractDispatchKey(t.key_set());
+}
+} // namespace at
+namespace c10 {
+template <>
+struct MaybeOwnedTraits<at::TensorBase> {
+  using owned_type = at::TensorBase;
+  using borrow_type = at::TensorBase;
+  static borrow_type createBorrow(const owned_type& from) {
+    // NOTE: this can be implemented without the special
+    // unsafe_borrow_t Tensor constructor as
+    //
+    // return borrow_type(c10::intrusive_ptr<at::TensorImpl, at::UndefinedTensorImpl>::reclaim(from.unsafeGetTensorImpl()));
+    //
+    // but that hurts inlining due to the nullptr check in the
+    // Tensor(c10::intrusive_ptr<...>) constructor. We already know
+    // that from.impl_ isn't null because from is a valid Tensor, so
+    // we needn't do the check again. (using __builtin_assume can
+    // avoid this, but wouldn't be portable to MSVC.)
+    return borrow_type(borrow_type::unsafe_borrow_t{}, from);
+  }
+  static void assignBorrow(borrow_type& lhs, const borrow_type& rhs) {
+    lhs.unsafeReleaseTensorImpl();
+    // See above note: this can be implemented with public API
+    // similarly to createBorrow(), but that would hurt inlining.
+    lhs = borrow_type(borrow_type::unsafe_borrow_t{}, rhs);
+  }
+  static void destroyBorrow(borrow_type& toDestroy) {
+    toDestroy.unsafeReleaseTensorImpl(); // "leak" it, but it was already +0.
+  }
+  static const owned_type& referenceFromBorrow(const borrow_type& borrow) {
+    return borrow;
+  }
+  static const owned_type* pointerFromBorrow(const borrow_type& borrow) {
+    return &borrow;
+  }
+  static bool debugBorrowIsValid(const borrow_type& /*borrow*/) {
+    return true;
+  }
+};
+template <>
+struct ExclusivelyOwnedTraits<at::TensorBase> : public c10::ExclusivelyOwnedTensorTraits<at::TensorBase> {};
+} // namespace c10
+namespace at {
+inline c10::MaybeOwned<TensorBase> borrow_from_optional_tensor(
+    const std::optional<TensorBase>& opt) {
+  return opt.has_value()
+    ? c10::MaybeOwned<TensorBase>::borrowed(*opt)
+    : c10::MaybeOwned<TensorBase>::owned(std::in_place);
+}
+inline c10::MaybeOwned<TensorBase> TensorBase::expect_contiguous(MemoryFormat memory_format) const & {
+  if (is_contiguous(memory_format)) {
+    return c10::MaybeOwned<TensorBase>::borrowed(*this);
+  } else {
+    return c10::MaybeOwned<TensorBase>::owned(__dispatch_contiguous(memory_format));
+  }
+}
+namespace symint {
+template <typename T>
+using enable_if_symint = std::enable_if_t<std::is_same_v<T, c10::SymInt>>;
+template <typename T>
+using enable_if_int = std::enable_if_t<std::is_same_v<T, int64_t>>;
+template <typename T, typename = enable_if_symint<T>>
+c10::SymIntArrayRef sizes(const TensorBase& t) { return t.sym_sizes(); }
+template <typename T, typename = enable_if_int<T>>
+IntArrayRef sizes(const TensorBase& t) { return t.sizes(); }
+template <typename T, typename = enable_if_symint<T>>
+c10::SymInt size(const TensorBase& t, int64_t dim) { return t.sym_size(dim); }
+template <typename T, typename = enable_if_int<T>>
+int64_t size(const TensorBase& t, int64_t dim) { return t.size(dim); }
+template <typename T, typename = enable_if_symint<T>>
+c10::SymIntArrayRef strides(const TensorBase& t) { return t.sym_strides(); }
+template <typename T, typename = enable_if_int<T>>
+IntArrayRef strides(const TensorBase& t) { return t.strides(); }
+template <typename T, typename = enable_if_symint<T>>
+c10::SymInt numel(const TensorBase& t) { return t.sym_numel(); }
+template <typename T, typename = enable_if_int<T>>
+int64_t numel(const TensorBase& t) { return t.numel(); }
+} // namespace symint
+} // namespace at

phivenv/Lib/site-packages/torch/include/ATen/core/TensorBody.h ADDED Viewed

The diff for this file is too large to render. See raw diff

phivenv/Lib/site-packages/torch/include/ATen/core/TorchDispatchUtils.h ADDED Viewed

	@@ -0,0 +1,17 @@

+#pragma once
+#include <ATen/core/dispatch/Dispatcher.h>
+#include <c10/core/impl/TorchDispatchModeTLS.h>
+#include <c10/util/ArrayRef.h>
+#include <torch/library.h>
+#include <optional>
+namespace at::impl {
+TORCH_API bool tensor_has_dispatch(const at::Tensor& t);
+TORCH_API bool tensorlist_has_dispatch(at::ITensorListRef li);
+TORCH_API bool tensorlist_has_dispatch(
+    const c10::List<std::optional<at::Tensor>>& li);
+using c10::impl::dispatch_mode_enabled;
+} // namespace at::impl

phivenv/Lib/site-packages/torch/include/ATen/core/TransformationHelper.h ADDED Viewed

	@@ -0,0 +1,175 @@

+#include <ATen/NumericUtils.h>
+#include <c10/macros/Macros.h>
+#include <c10/util/Half.h>
+#include <c10/util/BFloat16.h>
+#include <c10/util/MathConstants.h>
+#include <cmath>
+#include <cstdint>
+#include <cassert>
+#include <limits>
+#include <type_traits>
+namespace at {
+// Using DistAccumType in accumulate types for distributions.
+// Note: Ideally we'd be using ATen/AccumulateType.h but looks
+// like the there is some inconsistency in how accumulate types
+// are mapped currently, e.g. for the cpu side, float is mapped
+// to double.
+template <typename T>
+struct DistAccumType {  };
+#if defined(__CUDACC__) || defined(__HIPCC__)
+template <> struct DistAccumType<half> { using type = float; };
+#endif
+template <> struct DistAccumType<BFloat16> { using type = float; };
+template <> struct DistAccumType<Half> { using type = float; };
+template <> struct DistAccumType<float> { using type = float; };
+template <> struct DistAccumType<double> { using type = double; };
+template <typename T>
+using dist_acctype = typename DistAccumType<T>::type;
+namespace transformation {
+/**
+ * A transformation function for `torch.Tensor.random_()`, when both `from` and `to` are specified.
+ * `range` is `to - from`
+ * `base` is `from`
+ */
+template <typename T, typename V>
+C10_HOST_DEVICE inline T uniform_int_from_to(V val, uint64_t range, int64_t base) {
+  return static_cast<T>(static_cast<int64_t>((val % range) + base));
+}
+/**
+ * A transformation function for `torch.Tensor.random_()`, when `from=min_value(int64_t)` and to=None
+ */
+template <typename T, typename V>
+C10_HOST_DEVICE inline T uniform_int_full_range(V val) {
+  return static_cast<T>(static_cast<int64_t>(val));
+}
+/**
+ * A transformation function for `torch.Tensor.random_()`, when used without specifying `from` and `to`.
+ * In order to prevent compiler warnings reported in GitHub issue 46391, T can't be float or double
+ * in this overloaded version
+ */
+template <typename T, typename V>
+C10_HOST_DEVICE inline std::enable_if_t<!(std::is_floating_point_v<T>), T>uniform_int(V val) {
+  if constexpr (std::is_same_v<T, bool>) {
+    return static_cast<bool>(val & 1);
+  } else if constexpr (std::is_same_v<T, int64_t>) {
+    return static_cast<T>(val % (static_cast<uint64_t>(std::numeric_limits<T>::max()) + 1));
+  } else if constexpr (std::is_same_v<T, at::Half> || std::is_same_v<T, at::BFloat16>) {
+    return static_cast<T>(val % static_cast<uint64_t>((1ULL << std::numeric_limits<T>::digits) + 1));
+  } else if constexpr (std::is_integral_v<T>) {
+    return static_cast<T>(val % (static_cast<uint64_t>(std::numeric_limits<T>::max()) + 1));
+  } else {
+    assert(false);
+    return 0;
+  }
+}
+/**
+ * An overloaded transformation function for `torch.Tensor.random_()`, when used without specifying `from` and `to`,
+ * added to fix compiler warnings reported in GitHub issue 46391. T is either float or double in this version.
+ */
+template<typename T, typename V>
+C10_HOST_DEVICE inline std::enable_if_t<std::is_floating_point_v<T>, T>uniform_int(V val) {
+  return static_cast<T>(val % static_cast<uint64_t>((1ULL << std::numeric_limits<T>::digits) + 1));
+}
+template <typename T, typename V>
+C10_HOST_DEVICE inline dist_acctype<T> uniform_real(V val, T from, T to) {
+  constexpr auto MASK = static_cast<V>((static_cast<uint64_t>(1) << std::numeric_limits<T>::digits) - 1);
+  constexpr auto DIVISOR = static_cast<dist_acctype<T>>(1) / (static_cast<uint64_t>(1) << std::numeric_limits<T>::digits);
+  dist_acctype<T> x = (val & MASK) * DIVISOR;
+  return (x * (to - from) + from);
+}
+/**
+ * Transforms normally distributed `val` with mean 0.0 and standard deviation 1.0 to
+ * normally distributed with `mean` and standard deviation `std`.
+ */
+template <typename T>
+C10_HOST_DEVICE inline T normal(T val, T mean, T std) {
+  return val * std + mean;
+}
+/**
+ * Transforms uniformly distributed `val` between 0.0 and 1.0 to
+ * Cauchy distribution with location parameter `median` and scale parameter `sigma`.
+ */
+template <typename T>
+C10_HOST_DEVICE inline T cauchy(T val, T median, T sigma) {
+  // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function
+  // __tanf overflows and returns `inf/-inf` when (val > 1 - eps) or (val < 0 + eps),
+  // thus we clip those values.
+  constexpr T eps = std::numeric_limits<T>::epsilon();
+  constexpr T one_minus_eps = 1 - eps;
+  constexpr T zero_plus_eps = 0 + eps;
+  val = (val > one_minus_eps ? one_minus_eps : val);
+  val = (val < zero_plus_eps ? zero_plus_eps : val);
+  return median + sigma * at::tan(c10::pi<T> * (val - static_cast<T>(0.5)));
+}
+template <>
+C10_HOST_DEVICE inline double cauchy(double val, double median, double sigma) {
+  // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function
+  return median + sigma * at::tan(c10::pi<double> * (val - static_cast<double>(0.5)));
+}
+/**
+ * Transforms uniformly distributed `val` between 0.0 and 1.0 to
+ * exponentially distributed with `lambda` parameter of the distribution.
+ */
+template <typename T>
+C10_HOST_DEVICE inline T exponential(T val, T lambda) {
+  // https://en.wikipedia.org/wiki/Exponential_distribution#Generating_exponential_variates
+  // Different implementations for CUDA and CPU to preserve original logic
+  // TODO: must be investigated and unified!!!
+  // https://github.com/pytorch/pytorch/issues/38662
+#if defined(__CUDACC__) || defined(__HIPCC__)
+      // BEFORE TOUCHING THIS CODE READ: https://github.com/pytorch/pytorch/issues/16706
+      // curand_uniform has (0,1] bounds. log(1) is 0 and exponential excludes 0.
+      // we need log to be not 0, and not underflow when converted to half
+      // fast __logf approximation can underflow, so set log to -epsilon/2 for 1 or close to 1 args
+  auto log = val >= static_cast<T>(1.) - std::numeric_limits<T>::epsilon() / 2
+      ? -std::numeric_limits<T>::epsilon() / 2
+      : at::log(val);
+  return static_cast<T>(-1.0) / lambda * log;
+#else
+  return static_cast<T>(-1.0) / lambda * at::log1p(-val);
+#endif
+}
+/**
+ * Transforms uniformly distributed `val` between 0.0 and 1.0 to
+ * geometrically distributed with success probability `p`.
+ */
+template <typename T>
+C10_HOST_DEVICE inline T geometric(T val, T p) {
+  // https://en.wikipedia.org/wiki/Geometric_distribution#Related_distributions
+  return static_cast<T>(::ceil(at::log(val) / at::log1p(-p)));
+}
+/**
+ * Transforms normally distributed `val` to log-normally distributed.
+ */
+template <typename T>
+C10_HOST_DEVICE inline T log_normal(T val) {
+  // https://en.wikipedia.org/wiki/Log-normal_distribution#Mode,_median,_quantiles
+  return at::exp(val);
+}
+/**
+ * Transforms uniformly distributed `val` between 0.0 and 1.0 to
+ * bernoulli distributed with success probability `p`.
+ */
+template <typename T>
+C10_HOST_DEVICE inline T bernoulli(T val, T p) {
+  return val < p;
+}
+}} // namespace at::transformation

phivenv/Lib/site-packages/torch/include/ATen/core/UndefinedTensorImpl.h ADDED Viewed

	@@ -0,0 +1 @@


1	+ #include <c10/core/UndefinedTensorImpl.h>

phivenv/Lib/site-packages/torch/include/ATen/core/UnsafeFromTH.h ADDED Viewed

	@@ -0,0 +1,21 @@

+#pragma once
+#include <ATen/core/Tensor.h>
+namespace at {
+inline Tensor unsafeTensorFromTH(void * th_pointer, bool retain) {
+  auto tensor_impl = c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl>::reclaim(static_cast<TensorImpl*>(th_pointer));
+  if (retain && tensor_impl.get() != UndefinedTensorImpl::singleton()) {
+    c10::raw::intrusive_ptr::incref(tensor_impl.get());
+  }
+  return Tensor(std::move(tensor_impl));
+}
+inline Storage unsafeStorageFromTH(void * th_pointer, bool retain) {
+  if (retain && th_pointer) {
+    c10::raw::intrusive_ptr::incref(static_cast<StorageImpl*>(th_pointer));
+  }
+  return Storage(c10::intrusive_ptr<StorageImpl>::reclaim(static_cast<StorageImpl*>(th_pointer)));
+}
+}

phivenv/Lib/site-packages/torch/include/ATen/core/VariableHooksInterface.h ADDED Viewed

	@@ -0,0 +1,83 @@

+#pragma once
+#include <ATen/core/Tensor.h>
+#include <c10/macros/Export.h>
+// A little explanation about why this file exists at all.  We have
+// a few methods on Tensor class which require access to reified access to
+// AutogradMeta.  In open source, this isn't a big deal: we just access
+// torch/csrc/autograd/variable.h from aten/src/ATen/core/Tensor.cpp and
+// we can put the definitions inline.  This is because everything gets balled
+// into a single dynamic library in the end.
+//
+// However, inside our Facebook internal version of our build system, we
+// have a split between aten and torch/csrc.  So we cannot simply just
+// cross this boundary.  "Now wait," you might say, "Why don't we just
+// merge the libraries inside Facebook".  Well, the problem is that there
+// are some downstream applications which are at binary size limit, and
+// incorporating all of the extra code from libtorch would push them
+// over (admarket/adreview/service:adreviewservice, see also
+// https://github.com/pytorch/pytorch/pull/29299)  So if you want to do that,
+// we have to fix all of the services like this.
+//
+// I didn't want to block eliminating Tensor-Variable on this work, so I
+// had to introduce another dynamic dispatch to get to the variable
+// implementations (which live in torch/csrc/autograd/variable.cpp, FYI).
+//
+// I also considered using our existing dynamic dispatch mechanism, c10
+// dispatcher, to do this.  However, (1) some of the functions on Tensor
+// have weird signatures that are not supported by autograd, and (2)
+// see this bug https://github.com/pytorch/pytorch/issues/30102
+namespace torch::autograd {
+struct Node;
+} // namespace torch::autograd
+namespace at::impl {
+struct TORCH_API VariableHooksInterface {
+  virtual ~VariableHooksInterface() = default;
+  virtual TensorBase tensor_data(const TensorBase&) const = 0;
+  virtual TensorBase variable_data(const TensorBase&) const = 0;
+  virtual const std::shared_ptr<torch::autograd::Node>& grad_fn(
+      const TensorBase&) const = 0;
+  virtual unsigned _register_hook(
+      const TensorBase&,
+      std::function<TensorBase(const TensorBase&)> hook) const = 0;
+  virtual void remove_hook(const TensorBase&, unsigned pos) const = 0;
+  virtual bool is_view(const TensorBase&) const = 0;
+  virtual const TensorBase& base(const TensorBase&) const = 0;
+  virtual const std::string& name(const TensorBase&) const = 0;
+  virtual bool is_leaf(const TensorBase&) const = 0;
+  virtual int64_t output_nr(const TensorBase&) const = 0;
+  virtual void set_data(const TensorBase&, const TensorBase&) const = 0;
+  virtual TensorBase data(const TensorBase&) const = 0;
+  virtual int64_t _version(const TensorBase&) const = 0;
+  virtual void retain_grad(const TensorBase&) const = 0;
+  virtual bool retains_grad(const TensorBase&) const = 0;
+  virtual void _backward(
+      const Tensor&,
+      TensorList,
+      const std::optional<Tensor>&,
+      std::optional<bool>,
+      bool) const = 0;
+  virtual void requires_grad_(const TensorBase&, bool) const = 0;
+  virtual void basic_autograd_not_implemented_fallback(
+      const c10::OperatorHandle& op,
+      c10::DispatchKeySet dispatch_keys,
+      torch::jit::Stack* stack) const = 0;
+};
+TORCH_API void SetVariableHooks(VariableHooksInterface* hooks);
+TORCH_API VariableHooksInterface* GetVariableHooks();
+TORCH_API bool HasVariableHooks();
+struct TORCH_API VariableHooksRegisterer {
+  explicit VariableHooksRegisterer(VariableHooksInterface* hooks) {
+    SetVariableHooks(hooks);
+  }
+};
+} // namespace at::impl

phivenv/Lib/site-packages/torch/include/ATen/core/Variadic.h ADDED Viewed

	@@ -0,0 +1,92 @@

+#pragma once
+#include <utility>
+#include <c10/util/ArrayRef.h>
+#include <ATen/core/List.h>
+namespace at {
+// This class allows you to write variadic functions which
+// call a (possibly overloaded) function on each argument,
+// in order.  This is most commonly used in autogenerated code,
+// where it is convenient to have a function that can uniformly
+// take arguments of different types.  If your arguments
+// are homogenous consider using a std::initializer_list instead.
+//
+// For examples of this in use, see torch/csrc/utils/variadic.h
+template <typename F>
+struct IterArgs {
+  template <typename... Args>
+  inline F& apply() {
+    return self();
+  }
+  // NB: Use perfect forwarding here, otherwise we'll make value
+  // copies of all arguments!
+  template <typename T, typename... Args>
+  inline F& apply(T&& arg, Args&&... args) {
+    self()(std::forward<T>(arg));
+    if (self().short_circuit()) {
+      return self();
+    } else {
+      return apply(std::forward<Args>(args)...);
+    }
+  }
+  // Here are some handy overloads which provide sensible
+  // defaults for container-like structures that one might
+  // be interested in recursing into.  You can enable them
+  // by adding:
+  //
+  //    using IterArgs<YourStructName>::operator()
+  //
+  // to your struct.  These are not enabled by default because
+  // you may be able to process these structures more efficiently
+  // than handling them one-by-one.
+  template <typename T>
+  void operator()(c10::IListRef<T> args) {
+    for (const auto& arg : args) {
+      self()(arg);
+      if (self().short_circuit())
+        return;
+    }
+  }
+  template <typename T>
+  void operator()(at::ArrayRef<T> args) {
+    for (const auto& arg : args) {
+      self()(arg);
+      if (self().short_circuit())
+        return;
+    }
+  }
+  template <typename T>
+  void operator()(const torch::List<T>& args) {
+    for (const auto& arg : args) {
+      self()(arg);
+      if (self().short_circuit())
+        return;
+    }
+  }
+  // NB: we need to specify std::vector manually as C++ won't
+  // do an implicit conversion to make a template deduction go through.
+  template <typename T>
+  void operator()(const std::vector<T>& args) {
+    self()(at::ArrayRef<T>{args});
+  }
+  constexpr bool short_circuit() const {
+    return false;
+  }
+ private:
+  inline F& self() {
+    return *static_cast<F*>(this);
+  }
+};
+} // namespace torch

phivenv/Lib/site-packages/torch/include/ATen/core/Vitals.h ADDED Viewed

	@@ -0,0 +1,94 @@

+#pragma once
+#include <ostream>
+#include <sstream>
+#include <unordered_map>
+#include <c10/core/impl/LocalDispatchKeySet.h>
+namespace at::vitals {
+TORCH_API bool torchVitalEnabled();
+struct TORCH_API TorchVitalAttr {
+  // always initialized to empty
+  std::string value;
+  template <typename T>
+  TorchVitalAttr& operator<<(const T& t) {
+    if (torchVitalEnabled()) {
+      std::stringstream ss;
+      ss << t;
+      value += ss.str();
+    }
+    return *this;
+  }
+  template <typename T>
+  void write(const T& t, bool force) {
+    if (force || torchVitalEnabled()) {
+      std::stringstream ss;
+      ss << t;
+      value = ss.str();
+    }
+  }
+};
+struct TORCH_API TorchVital {
+  std::string name;
+  std::unordered_map<std::string, TorchVitalAttr> attrs;
+  explicit TorchVital(std::string n) : name(std::move(n)) {}
+  TorchVital(const TorchVital&) = default;
+  TorchVital(TorchVital&&) = default;
+  TorchVital& operator=(const TorchVital&) = default;
+  TorchVital& operator=(TorchVital&&) = default;
+  TorchVital() = delete;
+  TorchVitalAttr& create(const std::string& attr);
+  TorchVitalAttr& create(const std::string& attr, bool force);
+  friend std::ostream& operator<<(std::ostream& os, const TorchVital& dt);
+  ~TorchVital();
+};
+std::ostream& operator<<(std::ostream& os, TorchVital const& tv);
+// A way to access vitals by string names instead of by global reference.
+// This enables access to vitals from the PythonAPI.
+class TORCH_API APIVitals {
+ public:
+  bool vitals_enabled;
+  // Set any vital sign that was added to the map.
+  bool setVital(
+      const std::string& vital_name,
+      const std::string& attr_name,
+      const std::string& value,
+      bool force = false);
+  std::string readVitals();
+  APIVitals();
+  // Ensure this stays a singleton
+  APIVitals(APIVitals const& other) = delete;
+  APIVitals(APIVitals&& other) = delete;
+  APIVitals& operator=(const APIVitals&) = delete;
+  APIVitals& operator=(APIVitals&&) = delete;
+  ~APIVitals() = default;
+ private:
+  std::unordered_map<std::string, TorchVital> name_map_;
+};
+extern TORCH_API APIVitals VitalsAPI;
+} // namespace at::vitals
+#define TORCH_VITAL_DECLARE(name) \
+  TORCH_API at::vitals::TorchVital TorchVital_##name;
+#define TORCH_VITAL_DEFINE(name) \
+  TORCH_API at::vitals::TorchVital TorchVital_##name(#name);
+#define TORCH_VITAL_BASE(name) TorchVital_##name
+#define TORCH_VITAL(name, attr) TORCH_VITAL_BASE(name).create(#attr)

phivenv/Lib/site-packages/torch/include/ATen/core/boxing/BoxedKernel.h ADDED Viewed

	@@ -0,0 +1,213 @@

+#pragma once
+#include <ATen/core/boxing/OperatorKernel.h>
+#include <c10/core/DispatchKeySet.h>
+#include <c10/util/intrusive_ptr.h>
+namespace c10 {
+struct IValue;
+using Stack = std::vector<IValue>;
+class OperatorHandle;
+class KernelFunction;
+// This kernel implements the behavior of falling through to the next available
+// registered dispatch key.  The implementation of this function is FAST; it is
+// no overhead to fallthrough to the next key.  See cpp file for some more
+// implementation notes; notably, this does NOT actually go through the
+// boxing/unboxing codepath.
+TORCH_API void fallthrough_kernel(
+    OperatorKernel*,
+    const OperatorHandle&,
+    DispatchKeySet,
+    Stack*);
+// Note [Ambiguity in AutogradOther kernel]
+// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+// This error-reporting kernel is registered to the AutogradOther entry in the
+// dispatch table when there is both a CompositeImplicitAutograd kernel and a
+// backend kernel for ANY backend that maps to AutogradOther.  To see why
+// this is necessary in the AutogradOther case, it's helpful to first see
+// why everything works out fine for a backend that has a reserved Autograd
+// entry (see rule 2.2 in [Note] DispatchTable computation):
+//
+//    CPU   AutogradCPU
+//    reg?  registers with...
+//    -------------------------------------------------
+//    y     Autograd registration takes precedence
+//          over CompositeImplicitAutograd.
+//          This is good, because the CPU specific backend
+//          implementation is more specialized and typically better;
+//          if we used the composite, we would bypass it.
+//          (NB: the Autograd key is guaranteed to exist because
+//          the autograd codegen requires it!)
+//
+//    n     CompositeImplicitAutograd takes precedence.
+//          This is also good, because the Autograd
+//          registration (if it exists) would try to redispatch
+//          to the (non-existent) CPU implementation; by
+//          using the composite, we ensure the operator
+//          actually works.
+//
+// As you can see, when we have a specific Autograd key (AutogradCPU), we can
+// decide whether or not to use the CompositeImplicitAutograd kernel or the
+// Autograd kernel based on whether or not the backend kernel exists.
+//
+// However, for AutogradOther (which is the catchall autograd kernel for
+// everything that doesn't have a specific Autograd key), we can't do this
+// trick because there isn't any unique backend to peek at to disambiguate;
+// if there are some backends that have implementations they prefer Autograd,
+// but unimplemented backends would prefer CompositeImplicitAutograd.  Rather
+// than arbitrarily pick one or the other, we just register a kernel that raises
+// an error and let the user decide how to proceed.
+TORCH_API void ambiguous_autogradother_kernel(
+    OperatorKernel*,
+    const OperatorHandle&,
+    DispatchKeySet,
+    Stack*);
+// Note [named_not_supported_kernel]
+// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+// This kernel implements reporting an error message saying that named tensor is
+// not supported.  This kernel doesn't rely on the Stack, and so it is special
+// cased in the dispatcher to be triggered before we attempt boxing (so we can
+// give a good error message in cases when boxing is not supported).  When
+// boxing is universally supported this can be removed.
+[[noreturn]] TORCH_API void named_not_supported_kernel(
+    OperatorKernel*,
+    const OperatorHandle&,
+    DispatchKeySet,
+    Stack*);
+/**
+ * BoxedKernel is similar to a std::function storing a boxed kernel.
+ */
+class TORCH_API BoxedKernel final {
+ public:
+  // This is how boxed kernels are actually stored
+  //
+  // Note [Plumbing Keys Through The Dispatcher]
+  // Benchmarks have shown that it is expensive for the dispatcher to read from
+  // thread-local storage (TLS) upon every dispatch call into order to compute
+  // which kernel to dispatch to.
+  //
+  // To mitigate this, we've updated the calling convention inside the
+  // dispatcher to expect every kernel that it stores to have a first argument
+  // of type DispatchKeySet.
+  //
+  // What are the invariants of the DispatchKeySet when it gets passed to a
+  // kernel?
+  // - All keys to the left of the current dispatch key have been masked out.
+  //   (e.g. a Tracing kernel that takes in the DispatchKeySet will expect the
+  //   highest bit to be DispatchKey::Tracer)
+  // - All other keys that dispatcher normally would have computed through TLS +
+  // global state + op arguments
+  //   are still in the set.
+  //
+  // Kernels can then opt into using this keyset to save the dispatcher from
+  // doing repeated work during redispatches: recalculating the highest-priority
+  // dispatch key, which involves reading from TLS. Instead, the kernels that
+  // opt in will calculate an updated DispatchKeySet directly from the old one,
+  // and pass the updated set directly into the dispatcher upon redispatching.
+  //
+  // This is an opt-in mechanism: Kernels can automatically opt in by setting
+  // the first argument in their signature to be of type DispatchKeySet. See the
+  // kernels in VariableTypeEverything.cpp and TraceTypeEverything.cpp for
+  // examples.
+  //
+  // The mechanism for optionally passing that DispatchKeySet into the kernel
+  // lives in make_boxed_from_unboxed_functor.h. See Note [Plumbing Keys Through
+  // The Dispatcher 2] for details.
+  using InternalBoxedKernelFunction =
+      void(OperatorKernel*, const OperatorHandle&, DispatchKeySet, Stack*);
+  // This is the public API for how boxed kernels are defined
+  using BoxedKernelFunction = void(const OperatorHandle&, Stack*);
+  using BoxedKernelFunction_withDispatchKeys =
+      void(const OperatorHandle&, DispatchKeySet, Stack*);
+  BoxedKernel();
+  // Fast path for dispatch to allow not touching the boxed kernel in
+  // the common case where unboxed is available.
+  bool isValid() const;
+  bool isFallthrough() const;
+  /**
+   * Call the function with boxed arguments.
+   */
+  void callBoxed(
+      const OperatorHandle& opHandle,
+      DispatchKeySet dispatchKeySet,
+      Stack* stack) const;
+  /**
+   * Create a KernelFunction from a boxed function.
+   *
+   * Example:
+   *
+   * > void boxed_func(OperatorKernel*, Stack* stack) {...}
+   * > BoxedFunction func = BoxedKernel::makeFromFunction<&boxed_func>();
+   */
+  template <BoxedKernelFunction* func>
+  static BoxedKernel makeFromFunction();
+  /**
+   * TODO: This will only be useful if we write a backend fallback that plumbs
+   * dispatch keys (currently there are none) See Note [Plumbing Keys Through
+   * The Dispatcher] for details.
+   */
+  template <BoxedKernelFunction_withDispatchKeys* func>
+  static BoxedKernel makeFromFunction();
+  /**
+   * Create a KernelFunction from a boxed functor.
+   *
+   * Example:
+   *
+   * > class MyFunctor final : public c10::OperatorKernel {
+   * >   public:
+   * >     void operator()(const OperatorHandle&, DispatchKeySet, Stack*) {...}
+   * > };
+   * > BoxedKernel func =
+   * BoxedKernel::makeFromFunctor(std::make_unique<MyFunctor>());
+   */
+  template <class KernelFunctor>
+  static BoxedKernel makeFromFunctor(
+      std::unique_ptr<KernelFunctor> kernelFunctor);
+  static BoxedKernel makeFallthrough();
+  static BoxedKernel makeAmbiguousAutogradOther();
+  static BoxedKernel makeNamedNotSupported();
+ private:
+  friend class KernelFunction;
+  template <BoxedKernelFunction* func>
+  static void make_boxed_function(
+      OperatorKernel*,
+      const OperatorHandle& opHandle,
+      DispatchKeySet,
+      Stack* stack);
+  template <BoxedKernelFunction_withDispatchKeys* func>
+  static void make_boxed_function(
+      OperatorKernel*,
+      const OperatorHandle& opHandle,
+      DispatchKeySet,
+      Stack* stack);
+  explicit BoxedKernel(
+      std::unique_ptr<OperatorKernel> functor,
+      InternalBoxedKernelFunction* boxed_kernel_func);
+  OperatorKernel* getFunctor() const;
+  InternalBoxedKernelFunction* getFnPtr() const;
+  c10::intrusive_ptr<OperatorKernel> functor_;
+  InternalBoxedKernelFunction* boxed_kernel_func_;
+};
+} // namespace c10
+#include <ATen/core/boxing/BoxedKernel_impl.h>

phivenv/Lib/site-packages/torch/include/ATen/core/boxing/BoxedKernel_impl.h ADDED Viewed

	@@ -0,0 +1,106 @@

+#pragma once
+namespace c10 {
+inline BoxedKernel::BoxedKernel() : functor_(), boxed_kernel_func_(nullptr) {}
+inline BoxedKernel::BoxedKernel(
+    std::unique_ptr<OperatorKernel> functor,
+    InternalBoxedKernelFunction* boxed_kernel_func)
+    : functor_(std::move(functor)), boxed_kernel_func_(boxed_kernel_func) {}
+template <BoxedKernel::BoxedKernelFunction* func>
+inline void BoxedKernel::make_boxed_function(
+    OperatorKernel*,
+    const OperatorHandle& opHandle,
+    DispatchKeySet,
+    Stack* stack) {
+  // Note that we're dropping the DispatchKeySet argument.
+  // See Note [Plumbing Keys Through The Dispatcher 2] for details.
+  func(opHandle, stack);
+}
+template <BoxedKernel::BoxedKernelFunction_withDispatchKeys* func>
+inline void BoxedKernel::make_boxed_function(
+    OperatorKernel*,
+    const OperatorHandle& opHandle,
+    DispatchKeySet ks,
+    Stack* stack) {
+  // See Note [Plumbing Keys Through The Dispatcher 2] for details.
+  func(opHandle, ks, stack);
+}
+inline bool BoxedKernel::isValid() const {
+  return boxed_kernel_func_ != nullptr;
+}
+inline bool BoxedKernel::isFallthrough() const {
+  return boxed_kernel_func_ == &fallthrough_kernel;
+}
+inline void BoxedKernel::callBoxed(
+    const OperatorHandle& opHandle,
+    DispatchKeySet dispatchKeySet,
+    Stack* stack) const {
+  TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
+      boxed_kernel_func_ != nullptr,
+      "Tried to call BoxedKernel::callBoxed() on an uninitialized BoxedKernel.");
+  (*boxed_kernel_func_)(functor_.get(), opHandle, dispatchKeySet, stack);
+}
+template <BoxedKernel::BoxedKernelFunction* func>
+inline BoxedKernel BoxedKernel::makeFromFunction() {
+  return BoxedKernel(
+      nullptr, // no functor_ object
+      &make_boxed_function<func>);
+}
+template <BoxedKernel::BoxedKernelFunction_withDispatchKeys* func>
+inline BoxedKernel BoxedKernel::makeFromFunction() {
+  return BoxedKernel(
+      nullptr, // no functor_ object
+      &make_boxed_function<func>);
+}
+inline BoxedKernel BoxedKernel::makeFallthrough() {
+  return BoxedKernel(
+      nullptr, // no functor_ object
+      &fallthrough_kernel);
+}
+inline BoxedKernel BoxedKernel::makeAmbiguousAutogradOther() {
+  return BoxedKernel(
+      nullptr, // no functor_ object
+      &ambiguous_autogradother_kernel);
+}
+inline BoxedKernel BoxedKernel::makeNamedNotSupported() {
+  return BoxedKernel(
+      nullptr, // no functor_ object
+      &named_not_supported_kernel);
+}
+template <class KernelFunctor>
+inline BoxedKernel BoxedKernel::makeFromFunctor(
+    std::unique_ptr<KernelFunctor> kernelFunctor) {
+  static_assert(
+      std::is_base_of_v<OperatorKernel, KernelFunctor>,
+      "Tried to call BoxedKernel::makeFromFunctor<KernelFunctor>, but the functor doesn't inherit from c10::OperatorKernel. Please have the functor inherit from it.");
+  return BoxedKernel(
+      std::move(kernelFunctor),
+      [](OperatorKernel* kernel,
+         const OperatorHandle& op,
+         DispatchKeySet ks,
+         Stack* stack) {
+        (*static_cast<KernelFunctor*>(kernel))(op, ks, stack);
+      });
+}
+inline OperatorKernel* BoxedKernel::getFunctor() const {
+  return functor_.get();
+}
+inline BoxedKernel::InternalBoxedKernelFunction* BoxedKernel::getFnPtr() const {
+  return boxed_kernel_func_;
+}
+} // namespace c10

phivenv/Lib/site-packages/torch/include/ATen/core/boxing/KernelFunction.h ADDED Viewed

	@@ -0,0 +1,283 @@

+#pragma once
+#include <ATen/core/ATen_fwd.h>
+#include <ATen/core/boxing/BoxedKernel.h>
+#include <ATen/core/stack.h>
+#include <c10/core/DispatchKeySet.h>
+#include <c10/util/TypeList.h>
+#include <c10/util/intrusive_ptr.h>
+#include <type_traits>
+namespace c10 {
+using Stack = torch::jit::Stack; // TODO Instead of this, move torch::jit::Stack
+                                 // to the c10 namespace.
+class OperatorHandle;
+struct OperatorKernel;
+class KernelFunction;
+template <typename T>
+using has_symint = std::disjunction<
+    std::is_same<c10::SymInt, T>,
+    std::is_same<c10::SymIntArrayRef, T>,
+    std::is_same<at::OptionalSymIntArrayRef, T>,
+    std::is_same<std::optional<c10::SymInt>, T>>;
+template <typename T>
+struct remove_symint {
+  using type = T;
+};
+template <>
+struct remove_symint<c10::SymInt> {
+  using type = int64_t;
+};
+template <>
+struct remove_symint<at::OptionalSymIntArrayRef> {
+  using type = OptionalIntArrayRef;
+};
+template <>
+struct remove_symint<c10::SymIntArrayRef> {
+  using type = c10::IntArrayRef;
+};
+template <>
+struct remove_symint<std::optional<c10::SymInt>> {
+  using type = std::optional<int64_t>;
+};
+template <bool symint, typename T>
+struct maybe_keep_symint final {};
+template <typename T>
+struct maybe_keep_symint<true, T> {
+  using type = T;
+};
+template <typename T>
+struct maybe_keep_symint<false, T> {
+  using type = typename remove_symint<T>::type;
+};
+template <typename T>
+using fn_has_symint = typename guts::typelist::true_for_any_type<
+    has_symint,
+    typename guts::infer_function_traits<T>::type::parameter_types>;
+template <typename T>
+struct fn_remove_symint;
+template <typename Ret, typename... Args>
+struct fn_remove_symint<Ret(Args...)> {
+  using type = Ret(typename remove_symint<Args>::type...);
+};
+/**
+ * KernelFunction is similar to std::function but stores a kernel function.
+ * You can create a KernelFunction from a boxed or unboxed
+ * function/functor/lambda and call it in a boxed or unboxed way. If the way it
+ * was created doesn't match the way it was called, it will do boxing or
+ * unboxing as necessary.
+ */
+class TORCH_API KernelFunction final {
+ public:
+  using InternalBoxedKernelFunction = BoxedKernel::InternalBoxedKernelFunction;
+  using BoxedKernelFunction = BoxedKernel::BoxedKernelFunction;
+  using BoxedKernelFunction_withDispatchKeys =
+      BoxedKernel::BoxedKernelFunction_withDispatchKeys;
+  KernelFunction();
+  // Fast path for dispatch to allow not touching the boxed kernel in
+  // the common case where unboxed is available.
+  bool isValidUnboxed() const;
+  bool isValidSymUnboxed() const;
+  bool isValid() const;
+  bool isFallthrough() const;
+  /**
+   * Call the function in a boxed way.
+   * If the kernel function was created with an unboxed function,
+   * this will call an unboxing wrapper which then calls into that
+   * unboxed function.
+   *
+   * Example:
+   *
+   * > void boxed_func(OperatorKernel*, Stack* stack) {...}
+   * > KernelFunction func = KernelFunction::makeFromBoxedFunction(&boxed_func);
+   * > Tensor result = func.callBoxed(stack);
+   *
+   * Or, with an unboxed implementation:
+   *
+   * > KernelFunction func = KernelFunction::makeFromUnboxedLambda(
+   * >      [] (Tensor a, bool b) -> Tensor {...});
+   * > Tensor result = func.callBoxed(stack);
+   */
+  void callBoxed(
+      const OperatorHandle& opHandle,
+      DispatchKeySet dispatchKeySet,
+      Stack* stack) const;
+  /**
+   * Call the function in an unboxed way.
+   * If the kernel function was created with a boxed function,
+   * this will box all inputs and then call into that boxed function.
+   *
+   * Note that this doesn't work for all types yet.
+   *
+   * Example:
+   *
+   * > KernelFunction func = KernelFunction::makeFromUnboxedLambda(
+   * >      [] (Tensor a, bool b) -> Tensor {...});
+   * > Tensor result = func.call<Tensor, Tensor, bool>(tensor1, true);
+   *
+   * Or, with a boxed implementation:
+   *
+   * > void boxed_func(OperatorKernel*, Stack* stack) {...}
+   * > KernelFunction func = KernelFunction::makeFromBoxedFunction(&boxed_func);
+   * > Tensor result = func.call<Tensor, Tensor, bool>(tensor1, true);
+   */
+  template <class Return, class... Args>
+  Return call(
+      const OperatorHandle& opHandle,
+      DispatchKeySet dispatchKeySet,
+      Args... args) const;
+  /**
+   * Create a KernelFunction from a BoxedKernel.
+   */
+  static KernelFunction makeFromBoxedKernel(BoxedKernel boxed_fn);
+  /**
+   * Create a KernelFunction from a boxed function.
+   *
+   * Example:
+   *
+   * > void boxed_func(OperatorKernel*, Stack* stack) {...}
+   * > KernelFunction func =
+   * KernelFunction::makeFromBoxedFunction<&boxed_func>();
+   */
+  template <BoxedKernelFunction* func>
+  static KernelFunction makeFromBoxedFunction();
+  /**
+   * TODO: This will only be useful if we write a backend fallback that plumbs
+   * dispatch keys (currently there are none) See Note [Plumbing Keys Through
+   * The Dispatcher] for details.
+   */
+  template <BoxedKernelFunction_withDispatchKeys* func>
+  static KernelFunction makeFromBoxedFunction();
+  /**
+   * Create a KernelFunction from an unboxed functor.
+   *
+   * Example:
+   *
+   * > class MyFunctor final : public c10::OperatorKernel {
+   * >   public:
+   * >     Tensor operator()(Tensor a, Tensor b) {...}
+   * > };
+   * > KernelFunction func =
+   * KernelFunction::makeFromUnboxedFunctor<MyFunctor>(std::make_unique<MyFunctor>());
+   */
+  template <bool AllowLegacyTypes = false, class KernelFunctor>
+  static KernelFunction makeFromUnboxedFunctor(
+      std::unique_ptr<OperatorKernel> kernelFunctor);
+  /**
+   * Create a KernelFunction from a boxed functor.
+   *
+   * Example:
+   *
+   * > class MyFunctor final : public c10::OperatorKernel {
+   * >   public:
+   * >     void operator()(const OperatorHandle&, DispatchKeySet, Stack*) {...}
+   * > };
+   * > KernelFunction func =
+   * KernelFunction::makeFromBoxedFunctor(std::make_unique<MyFunctor>());
+   */
+  template <class KernelFunctor>
+  static KernelFunction makeFromBoxedFunctor(
+      std::unique_ptr<KernelFunctor> kernelFunctor);
+  /**
+   * Create a KernelFunction from an unboxed function.
+   * This is usually better than KernelFunction::makeFromUnboxedRuntimeFunction
+   * because knowing the function pointer as a template argument (i.e. at
+   * compile time) allows the compiler to inline the function into its
+   * unboxing wrapper and yields better performance when calling the function.
+   *
+   * Example:
+   *
+   * > Tensor unboxed_func(Tensor a, Tensor b) {...}
+   * > KernelFunction func =
+   * KernelFunction::makeFromUnboxedFunction<decltype(unboxed_func),
+   * &unboxed_func>();
+   */
+  template <class FuncPtr, bool AllowLegacyTypes = false>
+  static KernelFunction makeFromUnboxedFunction(FuncPtr);
+  /**
+   * Create a KernelFunction from an unboxed function.
+   * KernelFunction::makeFromUnboxedFunction is usually a better choice than
+   * this if you know the function pointer at compile time, see doc comment
+   * there for an explanation.
+   *
+   * Example:
+   *
+   * > Tensor unboxed_func(Tensor a, Tensor b) {...}
+   * > KernelFunction func =
+   * KernelFunction::makeFromUnboxedRuntimeFunction(&unboxed_func);
+   */
+  template <bool AllowLegacyTypes = false, class FuncType>
+  static KernelFunction makeFromUnboxedRuntimeFunction(FuncType* func);
+  static KernelFunction makeFallthrough();
+  static KernelFunction makeAmbiguousAutogradOther();
+  static KernelFunction makeNamedNotSupported();
+  /**
+   * Create a KernelFunction from an unboxed lambda.
+   *
+   * Example:
+   *
+   * > KernelFunction func = KernelFunction::makeFromUnboxedLambda(
+   * >      [] (Tensor a, bool b) -> Tensor {...});
+   */
+  template <bool AllowLegacyTypes = false, class Lambda>
+  static std::enable_if_t<
+      guts::is_stateless_lambda<std::decay_t<Lambda>>::value,
+      KernelFunction>
+  makeFromUnboxedLambda(Lambda&& lambda);
+  template <bool AllowLegacyTypes = false, class Lambda>
+  static std::enable_if_t<
+      !guts::is_stateless_lambda<std::decay_t<Lambda>>::value,
+      KernelFunction>
+  makeFromUnboxedLambda(Lambda&& lambda);
+  std::string dumpState() const;
+  // For testing internal invariants only
+  bool _equalsBoxedAndUnboxed(const KernelFunction&) const;
+ private:
+  explicit KernelFunction(
+      std::unique_ptr<OperatorKernel> functor,
+      InternalBoxedKernelFunction* boxed_kernel_func,
+      void* unboxed_kernel_func,
+      void* sym_unboxed_kernel_func);
+  explicit KernelFunction(
+      BoxedKernel boxed_fn,
+      void* unboxed_kernel_func,
+      void* sym_unboxed_kernel_func);
+  BoxedKernel boxed_kernel_func_;
+  void* unboxed_kernel_func_;
+  void* sym_unboxed_kernel_func_;
+};
+} // namespace c10
+#include <ATen/core/boxing/KernelFunction_impl.h>

phivenv/Lib/site-packages/torch/include/ATen/core/boxing/KernelFunction_impl.h ADDED Viewed

	@@ -0,0 +1,320 @@

+#include <ATen/core/boxing/impl/WrapFunctionIntoFunctor.h>
+#include <ATen/core/boxing/impl/WrapFunctionIntoRuntimeFunctor.h>
+#include <ATen/core/boxing/impl/boxing.h>
+#include <ATen/core/boxing/impl/make_boxed_from_unboxed_functor.h>
+#include <c10/util/C++17.h>
+#include <type_traits>
+namespace c10 {
+namespace detail {
+template <typename Base, typename Child, typename... Args>
+std::enable_if_t<
+    !std::is_array_v<Base> && !std::is_array_v<Child> &&
+        std::is_base_of_v<Base, Child>,
+    std::unique_ptr<Base>>
+make_unique_base(Args&&... args) {
+  return std::unique_ptr<Base>(new Child(std::forward<Args>(args)...));
+}
+} // namespace detail
+inline KernelFunction::KernelFunction()
+    : boxed_kernel_func_(),
+      unboxed_kernel_func_(nullptr),
+      sym_unboxed_kernel_func_(nullptr) {}
+inline KernelFunction::KernelFunction(
+    std::unique_ptr<OperatorKernel> functor,
+    InternalBoxedKernelFunction* boxed_kernel_func,
+    void* unboxed_kernel_func,
+    void* sym_unboxed_kernel_func = nullptr)
+    : boxed_kernel_func_(std::move(functor), boxed_kernel_func),
+      unboxed_kernel_func_(unboxed_kernel_func),
+      sym_unboxed_kernel_func_(sym_unboxed_kernel_func) {}
+inline KernelFunction::KernelFunction(
+    BoxedKernel boxed_fn,
+    void* unboxed_kernel_func,
+    void* sym_unboxed_kernel_func = nullptr)
+    : boxed_kernel_func_(std::move(boxed_fn)),
+      unboxed_kernel_func_(unboxed_kernel_func),
+      sym_unboxed_kernel_func_(sym_unboxed_kernel_func) {}
+inline bool KernelFunction::isValidUnboxed() const {
+  return unboxed_kernel_func_ != nullptr;
+}
+inline bool KernelFunction::isValidSymUnboxed() const {
+  return sym_unboxed_kernel_func_ != nullptr;
+}
+inline bool KernelFunction::isValid() const {
+  return boxed_kernel_func_.isValid();
+}
+inline bool KernelFunction::isFallthrough() const {
+  return boxed_kernel_func_.isFallthrough();
+}
+inline void KernelFunction::callBoxed(
+    const OperatorHandle& opHandle,
+    DispatchKeySet dispatchKeySet,
+    Stack* stack) const {
+  boxed_kernel_func_.callBoxed(opHandle, dispatchKeySet, stack);
+}
+template <class Return, class... Args>
+inline Return callUnboxedKernelFunction(
+    void* unboxed_kernel_func,
+    OperatorKernel* functor,
+    DispatchKeySet dispatchKeySet,
+    Args&&... args) {
+  using ActualSignature = Return(OperatorKernel*, DispatchKeySet, Args...);
+  ActualSignature* func =
+      reinterpret_cast<ActualSignature*>(unboxed_kernel_func);
+  return (*func)(functor, dispatchKeySet, std::forward<Args>(args)...);
+}
+// This template requires you to explicitly specify the argument you want to
+// forward; it doesn't work if you try to deduce it
+// NB: keep this in sync with cloneWithRealTypes in function_schema.cpp
+template <typename T>
+inline typename remove_symint<T>::type unpackSymInt(T x) {
+  return x;
+}
+template <>
+inline typename remove_symint<c10::SymInt>::type unpackSymInt(c10::SymInt x) {
+  return x.guard_int(__FILE__, __LINE__);
+}
+template <>
+inline typename remove_symint<c10::SymIntArrayRef>::type unpackSymInt(
+    c10::SymIntArrayRef x) {
+  return C10_AS_INTARRAYREF_SLOW(x);
+}
+template <>
+inline typename remove_symint<std::optional<c10::SymInt>>::type unpackSymInt(
+    std::optional<c10::SymInt> x) {
+  return x.has_value() ? std::make_optional(x->guard_int(__FILE__, __LINE__))
+                       : std::nullopt;
+}
+template <>
+inline typename remove_symint<at::OptionalSymIntArrayRef>::type unpackSymInt(
+    at::OptionalSymIntArrayRef x) {
+  return x.has_value() ? std::make_optional(C10_AS_INTARRAYREF_SLOW(*x))
+                       : std::nullopt;
+}
+template <class Return, class... Args>
+C10_ALWAYS_INLINE Return KernelFunction::call(
+    const OperatorHandle& opHandle,
+    DispatchKeySet dispatchKeySet,
+    Args... args) const {
+  // note: Args above is intentionally not Args&&. We don't want perfect
+  // forwarding, which would require Args to be deduced, but instead we
+  // want callers to explicitly specify the Args.
+  if constexpr (std::disjunction_v<has_symint<Args>...>) {
+    if (sym_unboxed_kernel_func_ != nullptr) {
+      auto* functor = boxed_kernel_func_.getFunctor();
+      return callUnboxedKernelFunction<Return, Args...>(
+          sym_unboxed_kernel_func_,
+          functor,
+          dispatchKeySet,
+          std::forward<Args>(args)...);
+    }
+    if (unboxed_kernel_func_ != nullptr) {
+      auto* functor = boxed_kernel_func_.getFunctor();
+      return callUnboxedKernelFunction<
+          Return,
+          typename remove_symint<Args>::type...>(
+          unboxed_kernel_func_,
+          functor,
+          dispatchKeySet,
+          unpackSymInt<Args>(args)...);
+    }
+  } else {
+    if (C10_LIKELY(unboxed_kernel_func_ != nullptr)) {
+      auto* functor = boxed_kernel_func_.getFunctor();
+      return callUnboxedKernelFunction<Return, Args...>(
+          unboxed_kernel_func_,
+          functor,
+          dispatchKeySet,
+          std::forward<Args>(args)...);
+    }
+  }
+  return impl::BoxedKernelWrapper<Return(Args...)>::call(
+      boxed_kernel_func_,
+      opHandle,
+      dispatchKeySet,
+      std::forward<Args>(args)...);
+}
+inline KernelFunction KernelFunction::makeFromBoxedKernel(
+    BoxedKernel boxed_fn) {
+  return KernelFunction(
+      std::move(boxed_fn), nullptr); // no unboxed function pointer
+}
+template <KernelFunction::BoxedKernelFunction* func>
+inline KernelFunction KernelFunction::makeFromBoxedFunction() {
+  return KernelFunction::makeFromBoxedKernel(
+      BoxedKernel::makeFromFunction<func>());
+}
+template <KernelFunction::BoxedKernelFunction_withDispatchKeys* func>
+inline KernelFunction KernelFunction::makeFromBoxedFunction() {
+  return KernelFunction::makeFromBoxedKernel(
+      BoxedKernel::makeFromFunction<func>());
+}
+inline KernelFunction KernelFunction::makeFallthrough() {
+  return KernelFunction::makeFromBoxedKernel(BoxedKernel::makeFallthrough());
+}
+inline KernelFunction KernelFunction::makeAmbiguousAutogradOther() {
+  return KernelFunction::makeFromBoxedKernel(
+      BoxedKernel::makeAmbiguousAutogradOther());
+}
+inline KernelFunction KernelFunction::makeNamedNotSupported() {
+  return KernelFunction::makeFromBoxedKernel(
+      BoxedKernel::makeNamedNotSupported());
+}
+template <bool AllowLegacyTypes, class KernelFunctor>
+inline KernelFunction KernelFunction::makeFromUnboxedFunctor(
+    std::unique_ptr<OperatorKernel> kernelFunctor) {
+#ifndef NDEBUG
+  // This assertion is costly for build time so it's debug-gated.
+  static_assert(
+      guts::is_functor<KernelFunctor>::value,
+      "Tried to call KernelFunction::makeFromUnboxedFunctor<KernelFunctor> but the argument is not a functor.");
+#endif
+  static_assert(
+      std::is_base_of_v<OperatorKernel, KernelFunctor>,
+      "Tried to call KernelFunction::makeFromUnboxedFunctor<KernelFunctor>, but the functor doesn't inherit from c10::OperatorKernel. Please have the functor inherit from it.");
+  auto* unboxed_fn = &impl::wrap_kernel_functor_unboxed<KernelFunctor>::call;
+  void* void_unboxed_fn = reinterpret_cast<void*>(unboxed_fn);
+  bool is_symint = fn_has_symint<decltype(unboxed_fn)>::value;
+  return KernelFunction(
+      std::move(kernelFunctor),
+      &impl::make_boxed_from_unboxed_functor<KernelFunctor, AllowLegacyTypes>::
+          call,
+      is_symint ? nullptr : void_unboxed_fn,
+      is_symint ? void_unboxed_fn : nullptr);
+}
+template <class KernelFunctor>
+inline KernelFunction KernelFunction::makeFromBoxedFunctor(
+    std::unique_ptr<KernelFunctor> kernelFunctor) {
+  return KernelFunction::makeFromBoxedKernel(
+      BoxedKernel::makeFromFunctor(std::move(kernelFunctor)));
+}
+template <class FuncPtr, bool AllowLegacyTypes>
+inline KernelFunction KernelFunction::makeFromUnboxedFunction(
+    FuncPtr func_ptr) {
+  static_assert(
+      is_compile_time_function_pointer<FuncPtr>::value,
+      "Tried to call KernelFunction::makeFromUnboxedFunction with an invalid parameter. It must be a function pointer created with TORCH_FN.");
+  static_assert(
+      !std::is_same_v<typename FuncPtr::FuncType, BoxedKernelFunction>,
+      "Tried to call KernelFunction::makeFromUnboxedFunction with a boxed function pointer. Please use KernelFunction::makeFromBoxedFunction instead.");
+#if defined(__GNUC__) && defined(__SANITIZE_ADDRESS__) && !defined(__CUDACC__)
+  TORCH_INTERNAL_ASSERT(
+      FuncPtr::func_ptr() != nullptr, "Kernel function cannot be nullptr");
+#else
+  static_assert(
+      FuncPtr::func_ptr() != nullptr, "Kernel function cannot be nullptr");
+#endif
+#if !defined(C10_MOBILE)
+  (void)func_ptr; // Suppress unused variable warning
+  return makeFromUnboxedFunctor<
+      AllowLegacyTypes,
+      typename impl::WrapFunctionIntoFunctor<FuncPtr>::type>(
+      detail::make_unique_base<
+          OperatorKernel,
+          typename impl::WrapFunctionIntoFunctor<FuncPtr>::type>());
+#else
+  // On mobile, we rather want to optimize for binary size than for performance,
+  // so let's not inline the kernel into the wrapper but use
+  // makeFromUnboxedRuntimeFunction instead.
+  return makeFromUnboxedRuntimeFunction(func_ptr.func_ptr());
+#endif
+}
+template <bool AllowLegacyTypes, class FuncType>
+inline KernelFunction KernelFunction::makeFromUnboxedRuntimeFunction(
+    FuncType* func) {
+  static_assert(
+      guts::is_function_type<FuncType>::value,
+      "Tried to call KernelFunction::makeFromUnboxedRuntimeFunction with a non-function type.");
+  static_assert(
+      !std::is_same_v<FuncType, BoxedKernelFunction>,
+      "Tried to call KernelFunction::makeFromUnboxedRuntimeFunction with a boxed function pointer. Please use KernelFunction::makeFromBoxedFunction instead.");
+  TORCH_INTERNAL_ASSERT(func != nullptr, "Kernel function cannot be nullptr");
+  return makeFromUnboxedFunctor<
+      AllowLegacyTypes,
+      impl::WrapFunctionIntoRuntimeFunctor<std::decay_t<FuncType>>>(
+      detail::make_unique_base<
+          OperatorKernel,
+          impl::WrapFunctionIntoRuntimeFunctor<std::decay_t<FuncType>>>(func));
+}
+template <bool AllowLegacyTypes, class Lambda>
+inline std::enable_if_t<
+    guts::is_stateless_lambda<std::decay_t<Lambda>>::value,
+    KernelFunction>
+KernelFunction::makeFromUnboxedLambda(Lambda&& lambda) {
+  static_assert(
+      guts::is_functor<std::decay_t<Lambda>>::value,
+      "Tried to call KernelFunction::makeFromUnboxedLambda with a non-lambda type.");
+#if !defined(C10_MOBILE)
+  return makeFromUnboxedFunctor<
+      AllowLegacyTypes,
+      impl::WrapFunctionIntoRuntimeFunctor<std::decay_t<Lambda>>>(
+      detail::make_unique_base<
+          OperatorKernel,
+          impl::WrapFunctionIntoRuntimeFunctor<std::decay_t<Lambda>>>(
+          std::forward<Lambda>(lambda)));
+#else
+  // On mobile, we rather want to optimize for binary size than for performance,
+  // so let's not inline the kernel into the wrapper but use
+  // makeFromUnboxedRuntimeFunction instead.
+  using FuncType =
+      typename guts::infer_function_traits_t<std::decay_t<Lambda>>::func_type;
+  return makeFromUnboxedRuntimeFunction<AllowLegacyTypes, FuncType>(lambda);
+#endif
+}
+template <bool AllowLegacyTypes, class Lambda>
+inline std::enable_if_t<
+    !guts::is_stateless_lambda<std::decay_t<Lambda>>::value,
+    KernelFunction>
+KernelFunction::makeFromUnboxedLambda(Lambda&& lambda) {
+  static_assert(
+      guts::is_functor<std::decay_t<Lambda>>::value,
+      "Tried to call KernelFunction::makeFromUnboxedLambda with a non-lambda type.");
+  return makeFromUnboxedFunctor<
+      AllowLegacyTypes,
+      impl::WrapFunctionIntoRuntimeFunctor<std::decay_t<Lambda>>>(
+      detail::make_unique_base<
+          OperatorKernel,
+          impl::WrapFunctionIntoRuntimeFunctor<std::decay_t<Lambda>>>(
+          std::forward<Lambda>(lambda)));
+}
+} // namespace c10

phivenv/Lib/site-packages/torch/include/ATen/core/boxing/OperatorKernel.h ADDED Viewed

	@@ -0,0 +1,27 @@

+#pragma once
+#include <c10/util/intrusive_ptr.h>
+namespace c10 {
+/**
+ * Inherit from OperatorKernel to implement a c10 kernel.
+ *
+ * Example:
+ * > namespace {
+ * >   class my_kernel_cpu final : public c10::OperatorKernel {
+ * >   public:
+ * >     Tensor operator()(Tensor a, Tensor b) {...}
+ * >   };
+ * > }
+ *
+ * The kernel class is allowed to have members but these are equivalent
+ * to global variables. The kernel implementation is responsible for
+ * preventing race conditions on them.
+ *
+ * See below for how to register this kernel with PyTorch.
+ */
+struct TORCH_API OperatorKernel : public c10::intrusive_ptr_target {
+  ~OperatorKernel() override = default;
+};
+} // namespace c10

phivenv/Lib/site-packages/torch/include/ATen/core/boxing/impl/WrapFunctionIntoFunctor.h ADDED Viewed

	@@ -0,0 +1,38 @@

+#pragma once
+#include <c10/core/CompileTimeFunctionPointer.h>
+namespace c10::impl {
+namespace detail {
+template <class FuncPtr, class ReturnType, class ParameterList>
+class WrapFunctionIntoFunctor_ {};
+template <class FuncPtr, class ReturnType, class... Parameters>
+class WrapFunctionIntoFunctor_<
+    FuncPtr,
+    ReturnType,
+    guts::typelist::typelist<Parameters...>>
+    final : public c10::OperatorKernel {
+ public:
+  C10_ALWAYS_INLINE decltype(auto) operator()(Parameters... args) {
+    return (*FuncPtr::func_ptr())(std::forward<Parameters>(args)...);
+  }
+};
+} // namespace detail
+// WrapFunctionIntoFunctor: Wraps a compile time function pointer into a kernel
+// functor. Since it is a compile time function pointer, many compilers can
+// inline it into the wrapper and you don't get any performance overhead for
+// wrapping.
+template <class FuncPtr>
+struct WrapFunctionIntoFunctor final {
+  static_assert(
+      c10::is_compile_time_function_pointer<FuncPtr>::value,
+      "WrapFunctionIntoFunctor can only wrap functions created with TORCH_FN.");
+  using type = detail::WrapFunctionIntoFunctor_<
+      FuncPtr,
+      typename guts::function_traits<typename FuncPtr::FuncType>::return_type,
+      typename guts::function_traits<
+          typename FuncPtr::FuncType>::parameter_types>;
+};
+} // namespace c10::impl

phivenv/Lib/site-packages/torch/include/ATen/core/boxing/impl/WrapFunctionIntoRuntimeFunctor.h ADDED Viewed

	@@ -0,0 +1,41 @@

+#pragma once
+#include <c10/util/TypeTraits.h>
+namespace c10::impl {
+namespace detail {
+template <class FuncType, class ReturnType, class ParameterList>
+class WrapFunctionIntoRuntimeFunctor_ {};
+template <class FuncType, class ReturnType, class... Parameters>
+class WrapFunctionIntoRuntimeFunctor_<
+    FuncType,
+    ReturnType,
+    guts::typelist::typelist<Parameters...>>
+    final : public c10::OperatorKernel {
+ public:
+  template <class FuncType_>
+  explicit WrapFunctionIntoRuntimeFunctor_(FuncType_&& kernel_func)
+      : kernel_func_(std::forward<FuncType_>(kernel_func)) {}
+  decltype(auto) operator()(Parameters... args) {
+    return kernel_func_(std::forward<Parameters>(args)...);
+  }
+ private:
+  FuncType kernel_func_;
+};
+} // namespace detail
+// WrapFunctionIntoRuntimeFunctor: Wraps any runtime functor into a functor that
+// inherits from c10::OperatorKernel, so it can be used as a c10 kernel.
+// This can, for example, be used for lambdas, functors or even function
+// pointers. In the case of function pointers, since it is a runtime function
+// pointer, there is an overhead for calling it whenever the kernel is invoked.
+template <class FuncType>
+using WrapFunctionIntoRuntimeFunctor = detail::WrapFunctionIntoRuntimeFunctor_<
+    FuncType,
+    typename guts::infer_function_traits_t<FuncType>::return_type,
+    typename guts::infer_function_traits_t<FuncType>::parameter_types>;
+} // namespace c10::impl

phivenv/Lib/site-packages/torch/include/ATen/core/boxing/impl/boxing.h ADDED Viewed

	@@ -0,0 +1,410 @@

+#pragma once
+// This file contains boxing (not unboxing) logic,
+// i.e. how to make a vector<IValue> from a set of concrete arguments.
+#include <ATen/core/ivalue.h>
+#include <ATen/core/stack.h>
+#include <c10/core/TensorOptions.h>
+#include <ATen/core/boxing/BoxedKernel.h>
+#include <c10/util/Metaprogramming.h>
+#include <type_traits>
+namespace c10::impl {
+//
+// utils
+//
+// is_mutable_tensor_ref
+template <class T>
+struct is_mutable_tensor_ref : std::false_type {};
+template <>
+struct is_mutable_tensor_ref<at::Tensor&> : std::true_type {};
+// is_tuple_of_mutable_tensor_refs
+//
+template <class T, class Enable = void>
+struct is_tuple_of_mutable_tensor_refs : std::false_type {};
+template <class T>
+struct is_tuple_of_mutable_tensor_refs<
+    T,
+    std::enable_if_t<guts::is_instantiation_of<std::tuple, T>::value, void>>
+    : guts::typelist::
+          all<is_mutable_tensor_ref, guts::typelist::from_tuple_t<T>> {};
+// has_ivalue_to<T> tests the presence/absence of instance method
+// IValue::to<T>()
+//
+template <class T, class Enable = void>
+struct has_ivalue_to : std::false_type {};
+template <class T>
+struct ivalue_to_helper {
+  using type = decltype(std::declval<IValue>().template to<T>());
+};
+template <class T>
+using ivalue_to_helper_t = typename ivalue_to_helper<T>::type;
+template <class T>
+struct has_ivalue_to<T, std::void_t<ivalue_to_helper_t<T>>> : std::true_type {};
+//
+// boxing predicates
+//
+// A boxable arg type is one that IValue has a constructor for.
+template <typename T>
+using can_box = std::disjunction<
+    std::is_constructible<IValue, std::decay_t<T>>,
+    // TensorOptions are not directly constructible into IValue,
+    // but torch::jit::push knows how to handle them
+    std::is_same<TensorOptions, std::decay_t<T>>>;
+template <typename... Ts>
+using can_box_all = std::conjunction<can_box<Ts>...>;
+// an unboxable result is one that can be extracted from an IValue
+template <typename T>
+using can_unbox = std::conjunction<
+    std::disjunction<
+        has_ivalue_to<T>,
+        // void returns are ok
+        std::is_same<void, T>>,
+    std::negation<std::is_lvalue_reference<T>>>;
+//
+// boxArgs - utility for pushing unboxed args onto IValue stack
+//
+template <class... Args>
+torch::jit::Stack boxArgs(Args... args) {
+  // TODO Reuse stack vector instead of allocating?
+  torch::jit::Stack stack;
+  stack.reserve(sizeof...(Args));
+  torch::jit::push(stack, std::forward<Args>(args)...);
+  return stack;
+}
+template <class T>
+inline constexpr size_t boxed_size_one() {
+  static_assert(
+      !std::is_same_v<std::decay_t<T>, c10::TensorOptions>,
+      "need to patch this path to support TensorOptions passed by reference");
+  return 1;
+}
+// torch::jit::push pushes 4 values for a TensorOptions; this needs to
+// be kept in sync.
+template <>
+inline constexpr size_t boxed_size_one<c10::TensorOptions>() {
+  return 4;
+}
+// NOTE: this could probably be simplified with C++17 fold expressions.
+template <typename...>
+struct BoxedSize : std::integral_constant<size_t, 0> {};
+template <class T, class... Args>
+struct BoxedSize<T, Args...>
+    : std::integral_constant<
+          size_t,
+          boxed_size_one<T>() + BoxedSize<Args...>::value> {};
+template <class... Args>
+static inline constexpr size_t boxed_size() {
+  return BoxedSize<Args...>::value;
+}
+template <typename T>
+C10_ALWAYS_INLINE_UNLESS_MOBILE void boxToStack(IValue*& dest, T& arg) {
+  new (dest++) IValue(arg);
+}
+C10_ALWAYS_INLINE_UNLESS_MOBILE void boxToStack(
+    IValue*& dest,
+    c10::TensorOptions options) {
+  new (dest++) IValue(c10::typeMetaToScalarType(options.dtype()));
+  new (dest++) IValue(options.layout());
+  new (dest++) IValue(options.device());
+  new (dest++) IValue(options.pinned_memory());
+}
+inline void boxArgsToStack(IValue*&) {}
+template <typename T, typename... Args>
+C10_ALWAYS_INLINE_UNLESS_MOBILE void boxArgsToStack(
+    IValue*& dest,
+    T& arg,
+    Args&... args) {
+  boxToStack(dest, arg);
+  boxArgsToStack(dest, args...);
+}
+//
+// PopResult is a helper class whose specializations handle popping single and
+// multiple return values, respectively.
+//
+template <class Result>
+struct PopResult final {
+  static Result call(Stack& stack) {
+    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
+        stack.size() == 1,
+        "Boxed kernel was expected to return one value on the stack, ",
+        "but instead pushed ",
+        stack.size(),
+        " values.");
+    return std::move(stack[0]).to<Result>();
+  }
+};
+template <class... Types>
+struct PopResult<std::tuple<Types...>> final {
+  using Result = std::tuple<Types...>;
+  static Result call(Stack& stack) {
+    // for tuple return types, boxed kernel has pushed multiple values onto the
+    // stack
+    constexpr int RetCount = sizeof...(Types);
+    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
+        stack.size() == RetCount,
+        "Boxed kernel was expected to return ",
+        RetCount,
+        " values on the stack, ",
+        "but instead pushed ",
+        stack.size(),
+        " values.");
+    return pop_to_tuple_impl(stack, std::make_index_sequence<RetCount>());
+  }
+ private:
+  // note: this has been moved into its own helper only to avoid a parse error
+  // on `indices` otherwise. I'm sure there's an incantation that slips it past
+  // the parser but eh
+  template <size_t... indices>
+  static Result pop_to_tuple_impl(
+      Stack& stack,
+      std::index_sequence<indices...>) {
+    return std::make_tuple((std::move(stack[indices]).template to<Types>())...);
+  }
+};
+//
+// BoxedKernelWrapper
+//
+// For a given function type FT, BoxedKernelWrapper<FT> implements
+// a `call` method that
+// - takes a boxed kernel and unboxed arguments as specified by FT,
+// - calls `boxArgs` to box the arguments
+// - calls the boxed kernel
+// - unboxes and returns the result
+//
+// The partial specializations below handle various cases: in
+// particular, not all types appearing in op signatures are supported,
+// and ops returning references have nonstandard wrapper implementations.
+//
+// 1. The base specialization of BoxedKernelWrapper should never be
+// instantiated. A "no call method defined on BoxedKernelWrapper" compile error
+// means that an op signature has failed to trigger any of the partial
+// specializations that follow this one.
+//
+template <class FuncType, class Enable = void>
+struct BoxedKernelWrapper {
+  // The reason we're not just doing straight up static_assert(false, ...) here:
+  // Basically, the way to make sure a static_assert only fires if a template
+  // is actually instantiated (rather than every time the file is parsed) is to
+  // use template parameters in the expression, e.g. FuncType here. However,
+  // since `sizeof(FuncType) != sizeof(FuncType)` is always false, this has the
+  // same effect.
+  static_assert(
+      sizeof(FuncType) != sizeof(FuncType),
+      "Function signature contains one or more unsupported parameter and/or return types. "
+      "Look for a nearby error like "
+      "\"'call' is not a member of 'c10::impl::BoxedKernelWrapper<(your function type), void>'\" "
+      "- (your function type) is the unsupported signature.");
+};
+//
+// 2. Supported signatures, other than those involving non-const Tensor refs -
+// i.e., "functional" ops.
+//
+template <class Result, class... Args>
+struct BoxedKernelWrapper<
+    Result(Args...),
+    std::enable_if_t<
+        can_box_all<Args...>::value && can_unbox<Result>::value &&
+            !is_tuple_of_mutable_tensor_refs<Result>::value,
+        void>> {
+  static Result call(
+      const BoxedKernel& boxed_kernel_func,
+      const OperatorHandle& opHandle,
+      DispatchKeySet dispatchKeySet,
+      Args... args) {
+    torch::jit::Stack stack = boxArgs<Args...>(std::forward<Args>(args)...);
+    boxed_kernel_func.callBoxed(opHandle, dispatchKeySet, &stack);
+    if constexpr (!std::is_same_v<void, Result>) {
+      // op has pushed one or more values onto the stack.
+      return PopResult<Result>::call(stack);
+    } else {
+      // op returns void, boxed kernel has pushed nothing onto stack.
+      TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
+          stack.empty(),
+          "Boxed kernel was expected to return no values on the stack, ",
+          "but instead returned ",
+          stack.size(),
+          " values.");
+    }
+  }
+};
+//
+// 3. in-place ops take a single non-const Tensor reference
+// as their first argument, and return it.
+//
+// Note: all signatures matching this pattern are assumed to be for such ops.
+// Because of this, the generated BoxedKernelWrapper specializations simply
+// return the in-place argument.
+//
+template <class... OtherArgs>
+struct BoxedKernelWrapper<
+    at::Tensor&(at::Tensor&, OtherArgs...),
+    std::enable_if_t<can_box_all<OtherArgs...>::value, void>> {
+  static at::Tensor& call(
+      const BoxedKernel& boxed_kernel_func,
+      const OperatorHandle& opHandle,
+      DispatchKeySet dispatchKeySet,
+      at::Tensor& outArg,
+      OtherArgs... otherArgs) {
+    torch::jit::Stack stack = boxArgs<at::Tensor&, OtherArgs...>(
+        outArg, std::forward<OtherArgs>(otherArgs)...);
+    boxed_kernel_func.callBoxed(opHandle, dispatchKeySet, &stack);
+    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
+        stack.size() == 1,
+        "Boxed kernel was expected to return a single value on the stack, ",
+        "but instead returned ",
+        stack.size(),
+        " values.");
+    return outArg;
+  }
+};
+//
+// 3.5. In-process migration to make in-place ops take and return
+// const references instead.
+template <class... OtherArgs>
+struct BoxedKernelWrapper<
+    const at::Tensor&(const at::Tensor&, OtherArgs...),
+    std::enable_if_t<can_box_all<OtherArgs...>::value, void>> {
+  static const at::Tensor& call(
+      const BoxedKernel& boxed_kernel_func,
+      const OperatorHandle& opHandle,
+      DispatchKeySet dispatchKeySet,
+      const at::Tensor& outArg,
+      OtherArgs... otherArgs) {
+    torch::jit::Stack stack = boxArgs(outArg, otherArgs...);
+    boxed_kernel_func.callBoxed(opHandle, dispatchKeySet, &stack);
+    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
+        stack.size() == 1,
+        "Boxed kernel was expected to return a single value on the stack, ",
+        "but instead returned ",
+        stack.size(),
+        " values.");
+    return outArg;
+  }
+};
+//
+// 4. out of place ops that take a single non-const Tensor reference as their
+// final argument, and also return it.
+//
+// Note: all signatures matching this pattern are assumed to be for such ops.
+// This assumption permits the generated BoxedKernelWrapper specializations to
+// simply return out arguments.
+//
+template <class FirstArg, class... RestArgs>
+struct BoxedKernelWrapper<
+    at::Tensor&(FirstArg, RestArgs...),
+    std::enable_if_t<
+        can_box_all<FirstArg, RestArgs...>::value
+            // this skips over in-place kernels with a non-const Tensor
+            // arg at the front, so those can unambiguously trigger the
+            // preceding specialization.
+            && !is_mutable_tensor_ref<FirstArg>::value,
+        void>> {
+  static at::Tensor& call(
+      const BoxedKernel& boxed_kernel_func,
+      const OperatorHandle& opHandle,
+      DispatchKeySet dispatchKeySet,
+      FirstArg firstArg,
+      RestArgs... restArgs) {
+    torch::jit::Stack stack = boxArgs<FirstArg, RestArgs...>(
+        std::forward<FirstArg>(firstArg), std::forward<RestArgs>(restArgs)...);
+    boxed_kernel_func.callBoxed(opHandle, dispatchKeySet, &stack);
+    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
+        stack.size() == 1,
+        "Boxed kernel was expected to return a single value on the stack, ",
+        "but instead returned ",
+        stack.size(),
+        " values.");
+    // reusing restArgs after it has been forwarded here is ok because we know
+    // that the last element is of type `Tensor&`.
+    return std::get<sizeof...(RestArgs) - 1>(
+        std::tuple<RestArgs...>{restArgs...});
+  }
+};
+//
+// 5. out of place ops that take multiple non-const Tensor references as their
+// final arguments, and return them in a std::tuple.
+//
+// Note: all signatures matching this pattern are assumed to be for such ops.
+// This assumption permits the generated BoxedKernelWrapper specializations to
+// simply return the out arguments.
+//
+template <class Result, class... Args>
+struct BoxedKernelWrapper<
+    Result(Args...),
+    std::enable_if_t<
+        can_box_all<Args...>::value &&
+            is_tuple_of_mutable_tensor_refs<Result>::value,
+        void>> {
+  static Result call(
+      const BoxedKernel& boxed_kernel_func,
+      const OperatorHandle& opHandle,
+      DispatchKeySet dispatchKeySet,
+      Args... args) {
+    using ArgTuple = std::tuple<Args...>;
+    constexpr int RetCount = std::tuple_size<Result>();
+    torch::jit::Stack stack = boxArgs<Args...>(std::forward<Args>(args)...);
+    boxed_kernel_func.callBoxed(opHandle, dispatchKeySet, &stack);
+    TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
+        stack.size() == RetCount,
+        "Boxed kernel was expected to return ",
+        RetCount,
+        " values on the stack, ",
+        "but instead returned ",
+        stack.size(),
+        " values.");
+    // reusing args after it has been forwarded here is ok because we know
+    // that the last RetCount elements are of type `Tensor&`.
+    auto result = guts::tuple_take<ArgTuple, -RetCount>(
+        ArgTuple{std::forward<Args>(args)...});
+    static_assert(
+        std::is_same_v<Result, decltype(result)>,
+        "The parameter list of an op returning a tuple of Tensor references "
+        "must end with an equal number of Tensor reference parameters.");
+    return result;
+  }
+};
+} // namespace c10::impl

phivenv/Lib/site-packages/torch/include/ATen/core/boxing/impl/make_boxed_from_unboxed_functor.h ADDED Viewed

	@@ -0,0 +1,785 @@

+#pragma once
+#include <ATen/core/IListRef.h>
+#include <ATen/core/boxing/OperatorKernel.h>
+#include <ATen/core/ivalue.h>
+#include <ATen/core/stack.h>
+#include <c10/util/Metaprogramming.h>
+#include <c10/util/TypeList.h>
+#include <c10/util/intrusive_ptr.h>
+#include <utility>
+namespace c10 {
+using Stack = torch::jit::Stack; // TODO Instead of this, move torch::jit::Stack
+                                 // to the c10 namespace.
+class OperatorHandle;
+/*
+ * [Note: Argument forwarding in the dispatcher]
+ *
+ * The dispatcher uses a somewhat unusual way to forward arguments through
+ * several layers of wrapper functions. This can be confusing because an
+ * experienced C++ programmer would look at this and think "oh this is supposed
+ * to be forwarding a universal reference but the && is missing. This is a
+ * bug.". It is not a bug. The common way in C++ to forward arguments is to use
+ * universal references:
+ *
+ * > template<class T> void func(T&& arg) { func2(std::forward<T>(arg)); }
+ *
+ * but that relies on inferring the correct reference type (i.e. value vs & vs
+ * &&) from the argument. In our case, we cannot rely on the argument as
+ * supplied by the caller, because that could infer a different reference type
+ * than was used in the kernel function. The correct reference type is dictated
+ * by the kernel signature and must be identical since we cast function pointers
+ * through void* pointers and mismatches would be UB. So we need a forwarding
+ * pattern that determines the reference type to use by looking at the
+ * explicitly supplied operator signature, not by looking at the argument we're
+ * calling it with.
+ *
+ * What does std::forward do, exactly?
+ * ------------------------------------
+ * std::forward<T>(t) is a way to cast t to the reference type supplied in T.
+ * Let's assume decay_t<T> == U and T is either U or some reference of U.
+ *  - std::forward<T&>(t) will return U&, no matter what kind of reference t is.
+ *  - std::forward<T&&>(t) will return U&&, no matter what kind of reference t
+ * is.
+ *  - std::forward<T>(t) will return U&& (not U!), no matter what kind of
+ * reference t is.
+ *
+ * For universal references, that means that in the following function
+ * > template<class T> void func(T&& arg) { func2(std::forward<T>(arg)); }
+ *
+ *  - when called with arg being a rvalue reference or non-reference value, T
+ * gets inferred to be a non-reference U, and std::forward<T>(t) will return
+ * U&&, correctly moving the argument.
+ *  - when called with arg behind a lvalue reference, T gets inferred to be U&
+ * because that's the only way to match the signature (in C++, a type that is
+ * (T&)&& will collapse to T&). That means std::forward<T>(t) will return U& and
+ * the value will not be moved but passed on as a lvalue reference.
+ *
+ * How do we use that?
+ * ------------------------------------
+ * But std::forward can also be used outside of the common "universal
+ * forwarding" pattern to change reference types. So instead of following the
+ * common C++ pattern, we notice what std::forward<T>() actually does, and that
+ * is it takes a value and changes its reference to the type of reference passed
+ * in as T. If we don't infer T but explicitly specify it, we can use this to
+ * forward based on an explicitly specified reference type instead of the
+ * inferred argument type.
+ *
+ * This is why many of the dispatcher functions look like
+ * > template<class T> func(T t) { func2<T>(std::forward<T>(t)); }
+ * instead of the common
+ * > template<class T> func(T&& t) { func2(std::forward<T>(t)); }
+ *
+ * and are expected to be called by explicitly specifying the template
+ * parameters in a way that matches the expected operator signature at each call
+ * site.
+ */
+namespace impl {
+// supported_primitive_arg_types defines which primitive types we allow in
+// kernel functions as arguments or returns.
+// Additionally, we support lists, dicts and optionals containing these types.
+using supported_primitive_arg_types = guts::typelist::typelist<
+    int64_t,
+    double,
+    bool,
+    std::string_view,
+    at::Tensor,
+    at::Scalar,
+    c10::QScheme,
+    c10::ScalarType,
+    c10::Device,
+    c10::DeviceIndex,
+    c10::Layout,
+    c10::MemoryFormat,
+    at::Dimname>;
+// We have an unboxed functor in hand that takes C++ arguments, and
+// we're building a boxed functor wrapper for it that takes IValues.
+// So "outside" is boxed and "inside" is unboxed.
+//
+// So a valid input type is one that our boxed functor wrapper can
+// unbox from an IValue into a C++ value.
+//
+// Whereas a valid output type is one that our wrapper can recieve
+// as a C++ value from the unboxed functor, and box into an IValue.
+//
+// assert_is_valid_input_type
+// checks that T can be unboxed from an IValue into a C++ value.
+//
+template <class T, bool AllowDeprecatedTypes, class Enable = void>
+struct assert_is_valid_input_type {
+  assert_is_valid_input_type() {
+    if constexpr (guts::typelist::contains<supported_primitive_arg_types, T>::
+                      value) {
+      /* everything is ok, this is a primitive type */
+    } else {
+      /* otherwise this must be an instance of a valid custom class, since it
+         can only have been created via IValue(x), which ensures this. */
+    }
+  }
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_input_type<std::optional<T>, AllowDeprecatedTypes>
+    : assert_is_valid_input_type<T, AllowDeprecatedTypes> {};
+template <bool AllowDeprecatedTypes, class... Args>
+struct TypeCheckHelper;
+template <bool AllowDeprecatedTypes>
+struct TypeCheckHelper<AllowDeprecatedTypes> {};
+template <bool AllowDeprecatedTypes, class Head, class... Rest>
+struct TypeCheckHelper<AllowDeprecatedTypes, Head, Rest...>
+    : TypeCheckHelper<AllowDeprecatedTypes, Rest...> {
+  assert_is_valid_input_type<Head, AllowDeprecatedTypes> check;
+};
+template <class... Contained, bool AllowDeprecatedTypes>
+struct assert_is_valid_input_type<
+    std::tuple<Contained...>,
+    AllowDeprecatedTypes>
+    : TypeCheckHelper<AllowDeprecatedTypes, Contained...> {};
+template <class Key, class Value, bool AllowDeprecatedTypes>
+struct assert_is_valid_input_type<Dict<Key, Value>, AllowDeprecatedTypes>
+    : assert_is_valid_input_type<Value, AllowDeprecatedTypes> {
+  static_assert(
+      guts::typelist::contains<impl::valid_dict_key_types, Key>::value,
+      "You tried to register a kernel with an unsupported input type: Dict<Key, Value> where Key is invalid. We only support int64_t, double, bool, and string.");
+};
+template <class Key, class Value, bool AllowDeprecatedTypes>
+struct assert_is_valid_input_type<
+    std::unordered_map<Key, Value>,
+    AllowDeprecatedTypes>
+    : assert_is_valid_input_type<Value, AllowDeprecatedTypes> {
+  static_assert(
+      AllowDeprecatedTypes,
+      "You tried to register a kernel with an unsupported input type: std::unordered_map<Key, Value>. Please use Dict<Key, Value> instead.");
+  static_assert(
+      guts::typelist::contains<impl::valid_dict_key_types, Key>::value,
+      "You tried to register a kernel with an unsupported input type: std::unordered_map<Key, Value> where Key is invalid. We only support int64_t, double, bool, and string.");
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_input_type<List<T>, AllowDeprecatedTypes>
+    : assert_is_valid_input_type<T, AllowDeprecatedTypes> {
+  static_assert(
+      !std::is_same_v<T, at::Scalar>,
+      "You tried to register a kernel with an unsupported input type: List<Scalar>. Please use List<int64_t>, List<double> or Tensor instead.");
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_input_type<c10::ArrayRef<T>, AllowDeprecatedTypes>
+    : assert_is_valid_input_type<T, AllowDeprecatedTypes> {
+  static_assert(
+      !std::is_same_v<T, at::Scalar>,
+      "You tried to register a kernel with an unsupported input type: ArrayRef<Scalar>. Please use List<int64_t>, List<double> or Tensor instead.");
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_input_type<
+    c10::OptionalArrayRef<T>,
+    AllowDeprecatedTypes>
+    : assert_is_valid_input_type<T, AllowDeprecatedTypes> {
+  static_assert(
+      !std::is_same_v<T, at::Scalar>,
+      "You tried to register a kernel with an unsupported input type: OptionalArrayRef<Scalar>. Please use List<int64_t>, List<double> or Tensor instead.");
+};
+template <class T, size_t N, bool AllowDeprecatedTypes>
+struct assert_is_valid_input_type<std::array<T, N>, AllowDeprecatedTypes>
+    : assert_is_valid_input_type<T, AllowDeprecatedTypes> {
+  static_assert(
+      !std::is_same_v<T, at::Scalar>,
+      "You tried to register a kernel with an unsupported input type: std::array<Scalar, N>. Please use std::array<int64_t, N> instead.");
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_input_type<
+    T,
+    AllowDeprecatedTypes,
+    std::enable_if_t<std::is_same_v<float, T>>> {
+  // There is no reason to support float when we have double. Keep the API lean.
+  static_assert(
+      guts::false_t<T>::value,
+      "You tried to register a kernel with an unsupported input type: float. Please use double instead; you should use `double` in the C++ function signature and `float` in the schema string.");
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_input_type<
+    T,
+    AllowDeprecatedTypes,
+    std::enable_if_t<std::is_same_v<const char*, T>>> {
+  static_assert(
+      guts::false_t<T>::value,
+      "You tried to register a kernel with an unsupported input type: const char*. Please use std::string_view instead.");
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_input_type<
+    T,
+    AllowDeprecatedTypes,
+    std::enable_if_t<std::is_same_v<std::vector<bool>, T>>> {
+  static_assert(
+      guts::false_t<T>::value,
+      "You tried to register a kernel with an unsupported input type: vector<bool>. Please use List<bool> instead.");
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_input_type<
+    T,
+    AllowDeprecatedTypes,
+    std::enable_if_t<
+        std::is_integral_v<T> &&
+        !guts::typelist::contains<supported_primitive_arg_types, T>::value>> {
+  static_assert(
+      guts::false_t<T>::value,
+      "You tried to register a kernel with an unsupported integral input type. Please use int64_t instead; you should use `int64_t` in the C++ function signature and `int` in the schema string.");
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_input_type<
+    T,
+    AllowDeprecatedTypes,
+    std::enable_if_t<std::is_same_v<const c10::SymInt&, T>>> {
+  static_assert(
+      guts::false_t<T>::value,
+      "You tried to register a kernel taking c10::SymInt by reference. Please accept it by value instead.");
+};
+// TODO: it probably would be good to tighten this up quite a bit more with
+// an explicit list for everything
+//
+// assert_is_valid_output_type
+//
+template <class T, bool AllowDeprecatedTypes, class Enable = void>
+struct assert_is_valid_output_type {
+  assert_is_valid_output_type() {
+    if constexpr (guts::typelist::contains<supported_primitive_arg_types, T>::
+                      value) {
+      /* everything is ok, this is a primitive type */
+    } else {
+      /* otherwise T is verified to be a registered custom class in the IValue
+        constructor, so no benefit in double-checking here */
+    }
+  }
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_output_type<std::optional<T>, AllowDeprecatedTypes>
+    : assert_is_valid_output_type<T, AllowDeprecatedTypes> {};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_output_type<
+    c10::OptionalArrayRef<T>,
+    AllowDeprecatedTypes>
+    : assert_is_valid_output_type<T, AllowDeprecatedTypes> {};
+template <class Key, class Value, bool AllowDeprecatedTypes>
+struct assert_is_valid_output_type<Dict<Key, Value>, AllowDeprecatedTypes>
+    : assert_is_valid_output_type<Value, AllowDeprecatedTypes> {
+  static_assert(
+      guts::typelist::contains<impl::valid_dict_key_types, Key>::value,
+      "You tried to register a kernel with an unsupported output type: Dict<Key, Value> where Key is invalid. We only support int64_t, double, bool, and string.");
+  static_assert(
+      !std::is_same_v<Value, at::Scalar>,
+      "You tried to register a kernel with an unsupported output type: Dict<Key, Scalar>. Please use Dict<Key, int64_t> or Dict<Key, double>.");
+};
+template <class Key, class Value, bool AllowDeprecatedTypes>
+struct assert_is_valid_output_type<
+    std::unordered_map<Key, Value>,
+    AllowDeprecatedTypes>
+    : assert_is_valid_output_type<Value, AllowDeprecatedTypes> {
+  static_assert(
+      AllowDeprecatedTypes,
+      "You tried to register a kernel with an unsupported output type: std::unordered_map<Key, Value>. Please use Dict<Key, Value> instead.");
+  static_assert(
+      guts::typelist::contains<impl::valid_dict_key_types, Key>::value,
+      "You tried to register a kernel with an unsupported output type: std::unordered_map<Key, Value> where Key is invalid. We only support int64_t, double, bool, and string.");
+  static_assert(
+      !std::is_same_v<Value, at::Scalar>,
+      "You tried to register a kernel with an unsupported output type: std::unordered_map<Key, Scalar>. Please use Dict<Key, int64_t> or Dict<Key, double>.");
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_output_type<List<T>, AllowDeprecatedTypes>
+    : assert_is_valid_output_type<T, AllowDeprecatedTypes> {
+  static_assert(
+      !std::is_same_v<T, at::Scalar>,
+      "You tried to register a kernel with an unsupported output type: List<Scalar>. Please use List<int64_t>, List<double> or Tensor instead.");
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_output_type<std::vector<T>, AllowDeprecatedTypes>
+    : assert_is_valid_output_type<T, AllowDeprecatedTypes> {
+  static_assert(
+      !std::is_same_v<T, at::Scalar>,
+      "You tried to register a kernel with an unsupported output type: std::vector<Scalar>. Please use List<int64_t>, List<double> or Tensor instead.");
+  // TODO static_assert(AllowDeprecatedTypes, "You tried to register a kernel
+  // with an unsupported output type: std::vector<T>. Please use List<T>
+  // instead.");
+};
+template <class T, size_t N, bool AllowDeprecatedTypes>
+struct assert_is_valid_output_type<std::array<T, N>, AllowDeprecatedTypes>
+    : assert_is_valid_output_type<T, AllowDeprecatedTypes> {
+  static_assert(
+      !std::is_same_v<T, at::Scalar>,
+      "You tried to register a kernel with an unsupported output type: std::array<Scalar, N>. Please use std::array<int64_t, N> instead.");
+};
+// The following specialisations of assert_is_valid_output_type are technically
+// not necessary since we would hit the base case and show an error message
+// there if they didn't exist, but we can show a better error message
+// in some common error scenarios.
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_output_type<
+    T,
+    AllowDeprecatedTypes,
+    std::enable_if_t<std::is_same_v<float, T>>> {
+  // There is no reason to support float when we have double. Keep the API lean.
+  static_assert(
+      guts::false_t<T>::value,
+      "You tried to register a kernel with an unsupported output type: float. Please use double instead; you should use `double` in the C++ function signature and `float` in the schema string.");
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_output_type<
+    T,
+    AllowDeprecatedTypes,
+    std::enable_if_t<std::is_same_v<const char*, T>>> {
+  static_assert(
+      guts::false_t<T>::value,
+      "You tried to register a kernel with an unsupported output type: const char*. Please use std::string_view instead.");
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_output_type<
+    T,
+    AllowDeprecatedTypes,
+    std::enable_if_t<std::is_same_v<std::vector<bool>, T>>> {
+  static_assert(
+      guts::false_t<T>::value,
+      "You tried to register a kernel with an unsupported output type: vector<bool>. Please use List<bool> instead.");
+};
+template <class T, bool AllowDeprecatedTypes>
+struct assert_is_valid_output_type<
+    T,
+    AllowDeprecatedTypes,
+    std::enable_if_t<
+        std::is_integral_v<T> &&
+        !guts::typelist::contains<supported_primitive_arg_types, T>::value>> {
+  static_assert(
+      guts::false_t<T>::value,
+      "You tried to register a kernel with an unsupported integral output type. Please use int64_t instead; you should use `int64_t` in the C++ function signature and `int` in the schema string.");
+};
+// ivalue_to_arg
+template <class T>
+struct decay_if_not_tensor final {
+  using type = std::decay_t<T>;
+};
+template <>
+struct decay_if_not_tensor<at::Tensor&> final {
+  using type = at::Tensor&;
+};
+template <>
+struct decay_if_not_tensor<const at::Tensor&> final {
+  using type = const at::Tensor&;
+};
+template <class T, bool AllowDeprecatedTypes>
+struct ivalue_to_arg final {
+  static decltype(auto) call(IValue& v) {
+    assert_is_valid_input_type<T, AllowDeprecatedTypes>();
+    return std::move(v).to<T>();
+  }
+};
+// The following two specializations take advantage of specialized
+// `toTensor()` overloads on IValue to avoid copying.
+template <bool AllowDeprecatedTypes>
+struct ivalue_to_arg<at::Tensor&, AllowDeprecatedTypes> final {
+  // We cannot use the default implementation if they asked for a
+  // `at::Tensor&` because it moves from the IValue, so it can't get
+  // an lvalue reference.
+  static at::Tensor& call(IValue& v) {
+    // Tensor& is valid, don't bother asserting
+    return v.toTensor();
+  }
+};
+template <bool AllowDeprecatedTypes>
+struct ivalue_to_arg<const at::Tensor&, AllowDeprecatedTypes> final {
+  // We should not use the default implementation if they asked for
+  // a `const at::Tensor&` because it moves from the IValue and they
+  // didn't ask for that.
+  static const at::Tensor& call(IValue& v) {
+    // const Tensor& is valid, don't bother asserting
+    return v.toTensor();
+  }
+};
+template <bool AllowDeprecatedTypes>
+struct ivalue_to_arg<at::ITensorListRef, AllowDeprecatedTypes> final {
+  static List<at::Tensor> call(IValue& v) {
+    return v.toTensorList();
+  }
+};
+template <class T, bool AllowDeprecatedTypes>
+struct ivalue_to_arg<ArrayRef<T>, AllowDeprecatedTypes> final {
+  // If an argument is ArrayRef<T>, convert the IValue to a std::vector<T> and
+  // pass that to the operator. std::vector<T> is implicitly convertible to
+  // ArrayRef<T>.
+  static std::vector<T> call(IValue& v) {
+    return ivalue_to_arg<std::vector<T>, AllowDeprecatedTypes>::call(v);
+  }
+};
+template <bool AllowDeprecatedTypes>
+struct ivalue_to_arg<c10::SymIntArrayRef, AllowDeprecatedTypes> final {
+  static std::vector<c10::SymInt> call(IValue& v) {
+    if (v.isIntList()) {
+      std::vector<c10::SymInt> r;
+      auto src = v.toIntList();
+      std::transform(
+          src.begin(), src.end(), std::back_inserter(r), [](int64_t i) {
+            return c10::SymInt(i);
+          });
+      return r;
+    } else {
+      return ivalue_to_arg<std::vector<c10::SymInt>, AllowDeprecatedTypes>::
+          call(v);
+    }
+  }
+};
+template <bool AllowDeprecatedTypes>
+struct ivalue_to_arg<c10::OptionalArray<c10::SymInt>, AllowDeprecatedTypes>
+    final {
+  static OptionalArray<c10::SymInt> call(IValue& v) {
+    if (v.isIntList()) {
+      std::vector<c10::SymInt> r;
+      auto src = v.toIntList();
+      std::transform(
+          src.begin(), src.end(), std::back_inserter(r), [](int64_t i) {
+            return c10::SymInt(i);
+          });
+      return OptionalArray<c10::SymInt>(std::move(r));
+    } else {
+      return std::move(v).to<OptionalArray<c10::SymInt>>();
+    }
+  }
+};
+template <class T, bool AllowDeprecatedTypes>
+struct ivalue_to_arg<std::optional<ArrayRef<T>>, AllowDeprecatedTypes> final {
+  // If an argument is std::optional<ArrayRef<T>>, convert the IValue to an
+  // std::optional<std::vector<T>> and pass that to the operator.
+  // OptionalArray<T> is basically a std::optional<std::vector<T>> but
+  // implicitly convertible to std::optional<ArrayRef<T>>.
+  static OptionalArray<T> call(IValue& v) {
+    return ivalue_to_arg<OptionalArray<T>, AllowDeprecatedTypes>::call(v);
+  }
+};
+template <class T, bool AllowDeprecatedTypes>
+struct ivalue_to_arg<OptionalArrayRef<T>, AllowDeprecatedTypes> final {
+  // If an argument is OptionalArrayRef<T>, convert the IValue to an
+  // std::optional<std::vector<T>> and pass that to the operator.
+  // OptionalArray<T> is basically a std::optional<std::vector<T>> but
+  // implicitly convertible to OptionalArrayRef<T>
+  static OptionalArray<T> call(IValue& v) {
+    return ivalue_to_arg<OptionalArray<T>, AllowDeprecatedTypes>::call(v);
+  }
+};
+// return_to_ivalue
+template <class T, bool AllowDeprecatedTypes, class Enable = void>
+struct return_to_ivalue final {};
+template <class T, bool AllowDeprecatedTypes>
+struct return_to_ivalue<
+    T,
+    AllowDeprecatedTypes,
+    std::enable_if_t<!std::is_same_v<at::Tensor&, T>>>
+    final {
+  static IValue call(T&& v) {
+    assert_is_valid_output_type<T, AllowDeprecatedTypes>();
+    return c10::ivalue::from(std::move(v));
+  }
+  static IValue copy(const T& v) {
+    assert_is_valid_output_type<T, AllowDeprecatedTypes>();
+    return IValue(v);
+  }
+};
+// Special case to allow kernels to return `Tensor&`.
+// TODO Delete this once kernels don't do that anymore
+template <bool AllowDeprecatedTypes>
+struct return_to_ivalue<at::Tensor&, AllowDeprecatedTypes, void> final {
+  static IValue call(at::Tensor& v) {
+    return c10::ivalue::from(v);
+  }
+  static IValue copy(at::Tensor& v) {
+    return IValue(v);
+  }
+};
+// wrap_kernel_functor_unboxed_
+template <class KernelFunctor, class OpSignature>
+struct wrap_kernel_functor_unboxed_ final {};
+// This specialization is for kernels with a first argument that is NOT of type
+// DispatchKeySet This includes kernels with 0 arguments.
+template <class KernelFunctor, class ReturnType, class... ParameterTypes>
+struct wrap_kernel_functor_unboxed_<
+    KernelFunctor,
+    ReturnType(ParameterTypes...)>
+    final {
+  static_assert(
+      std::is_same_v<
+          ReturnType,
+          typename guts::infer_function_traits_t<KernelFunctor>::return_type>,
+      "Return type mismatch");
+  static_assert(
+      std::is_same_v<
+          guts::typelist::typelist<ParameterTypes...>,
+          typename guts::infer_function_traits_t<
+              KernelFunctor>::parameter_types>,
+      "Parameter types mismatch");
+  // See [Note: Argument forwarding in the dispatcher] for why ParameterTypes
+  // doesn't use &&
+  static ReturnType call(
+      OperatorKernel* functor,
+      DispatchKeySet,
+      ParameterTypes... args) {
+    KernelFunctor* functor_ = static_cast<KernelFunctor*>(functor);
+    // Note [Plumbing Keys Through The Dispatcher 2]
+    // See Note [Plumbing Keys Through The Dispatcher] for the background.
+    // This functor explicitly takes in a dispatchKeySet and drops it on the
+    // floor- it does not forward it to the registered kernel.
+    //
+    // This is due to the calling convention within the dispatcher, which
+    // expects all registered kernels to have a first argument of type
+    // DispatchKeySet.
+    // This is not the case for pretty much all manually written kernels,
+    // however- this functor serves to separate the calling convention of the
+    // dispatcher from the calling convention of manually written kernels.
+    return (*functor_)(std::forward<ParameterTypes>(args)...);
+  }
+};
+// This specialization is for kernels with a first argument of type
+// DispatchKeySet
+template <class KernelFunctor, class ReturnType, class... ParameterTypes>
+struct wrap_kernel_functor_unboxed_<
+    KernelFunctor,
+    ReturnType(DispatchKeySet, ParameterTypes...)>
+    final {
+  static_assert(
+      std::is_same_v<
+          ReturnType,
+          typename guts::infer_function_traits_t<KernelFunctor>::return_type>,
+      "Return type mismatch");
+  static_assert(
+      std::is_same_v<
+          guts::typelist::typelist<DispatchKeySet, ParameterTypes...>,
+          typename guts::infer_function_traits_t<
+              KernelFunctor>::parameter_types>,
+      "Parameter types mismatch");
+  // See [Note: Argument forwarding in the dispatcher] for why ParameterTypes
+  // doesn't use &&
+  static ReturnType call(
+      OperatorKernel* functor,
+      DispatchKeySet dispatchKeySet,
+      ParameterTypes... args) {
+    KernelFunctor* functor_ = static_cast<KernelFunctor*>(functor);
+    // We're explicitly taking in a dispatchKeySet and forwarding it to the
+    // registered kernel. See Note [Plumbing Keys Through The Dispatcher 2] for
+    // details.
+    return (*functor_)(dispatchKeySet, std::forward<ParameterTypes>(args)...);
+  }
+};
+template <class KernelFunctor>
+using wrap_kernel_functor_unboxed = wrap_kernel_functor_unboxed_<
+    KernelFunctor,
+    typename guts::infer_function_traits_t<KernelFunctor>::func_type>;
+// call_functor_with_args_from_stack
+template <
+    class Functor,
+    bool AllowDeprecatedTypes,
+    size_t... ivalue_arg_indices,
+    typename... ArgTypes>
+std::decay_t<typename guts::infer_function_traits_t<Functor>::return_type>
+call_functor_with_args_from_stack_(
+    OperatorKernel* functor,
+    DispatchKeySet dispatchKeySet,
+    Stack* stack,
+    std::index_sequence<ivalue_arg_indices...>,
+    guts::typelist::typelist<ArgTypes...>*) {
+  (void)(stack); // when sizeof...(ivalue_arg_indices) == 0, this argument would
+                 // be unused and we have to silence the compiler warning.
+  // We're explicitly filtering out DispatchKeySet from the argument list.
+  // Some kernels take a DispatchKeySet as their first argument in order to
+  // plumb keys through the dispatcher. We don't want to expose the
+  // DispatchKeySet type to jit, so we don't include this argument on the stack.
+  // See Note [Plumbing Keys Through The Dispatcher] for the background.
+  return wrap_kernel_functor_unboxed<Functor>::call(
+      functor,
+      dispatchKeySet,
+      ivalue_to_arg<
+          typename decay_if_not_tensor<ArgTypes>::type,
+          AllowDeprecatedTypes>::
+          call(torch::jit::peek(
+              *stack, ivalue_arg_indices, sizeof...(ivalue_arg_indices)))...);
+}
+template <class Functor, bool AllowDeprecatedTypes>
+std::decay_t<typename guts::infer_function_traits_t<Functor>::return_type>
+call_functor_with_args_from_stack(
+    OperatorKernel* functor,
+    DispatchKeySet dispatchKeySet,
+    Stack* stack) {
+  // We're explicitly filtering out DispatchKeySet from the argument list.
+  // Some kernels take a DispatchKeySet as their first argument in order to
+  // plumb keys through the dispatcher. We don't want to expose the
+  // DispatchKeySet type to jit, so we don't include this argument on the stack.
+  // See Note [Plumbing Keys Through The Dispatcher] for the background.
+  using ArgTypes = typename c10::remove_DispatchKeySet_arg_from_func<
+      Functor>::parameter_types;
+  constexpr size_t num_ivalue_args = guts::typelist::size<ArgTypes>::value;
+  return call_functor_with_args_from_stack_<Functor, AllowDeprecatedTypes>(
+      functor,
+      dispatchKeySet,
+      stack,
+      std::make_index_sequence<num_ivalue_args>(),
+      static_cast<ArgTypes*>(nullptr));
+}
+// push_outputs
+template <class OutputType, bool AllowDeprecatedTypes>
+struct push_outputs final {
+  // Contrary to [Note: Argument forwarding in the dispatcher], we use
+  // OutputType&& here to avoid one extra call to the move constructor in this
+  // case. This is still not a universal reference though because OutputType is
+  // an explicitly specified class template parameter.
+  static void call(OutputType&& output, Stack* stack) {
+    torch::jit::push(
+        *stack,
+        return_to_ivalue<OutputType, AllowDeprecatedTypes>::call(
+            std::forward<OutputType>(output)));
+  }
+  static void copy(const OutputType& output, Stack* stack) {
+    torch::jit::push(
+        *stack,
+        return_to_ivalue<OutputType, AllowDeprecatedTypes>::copy(output));
+  }
+};
+template <class... OutputTypes, bool AllowDeprecatedTypes>
+struct push_outputs<std::tuple<OutputTypes...>, AllowDeprecatedTypes> final {
+  static void call(std::tuple<OutputTypes...>&& output, Stack* stack) {
+    call_(
+        std::move(output),
+        stack,
+        std::make_index_sequence<sizeof...(OutputTypes)>());
+  }
+  static void copy(const std::tuple<OutputTypes...>& output, Stack* stack) {
+    copy_(output, stack, std::make_index_sequence<sizeof...(OutputTypes)>());
+  }
+ private:
+  template <size_t... indices>
+  static void call_(
+      std::tuple<OutputTypes...>&& output,
+      Stack* stack,
+      std::index_sequence<indices...>) {
+    torch::jit::push(
+        *stack,
+        return_to_ivalue<OutputTypes, AllowDeprecatedTypes>::call(
+            std::forward<OutputTypes>(std::get<indices>(output)))...);
+  }
+  template <size_t... indices>
+  static void copy_(
+      const std::tuple<OutputTypes...>& output,
+      Stack* stack,
+      std::index_sequence<indices...>) {
+    torch::jit::push(
+        *stack,
+        return_to_ivalue<OutputTypes, AllowDeprecatedTypes>::copy(
+            std::get<indices>(output))...);
+  }
+};
+template <bool AllowDeprecatedTypes>
+struct push_outputs<void, AllowDeprecatedTypes> final {
+  static void call(int /*dummy*/, Stack* /*stack*/) {}
+  static void copy(int /*dummy*/, Stack* /*stack*/) {}
+};
+// make_boxed_from_unboxed_functor
+template <class KernelFunctor, bool AllowDeprecatedTypes>
+struct make_boxed_from_unboxed_functor final {
+  static_assert(
+      std::is_base_of_v<OperatorKernel, KernelFunctor>,
+      "Tried to register a kernel functor using the kernel<Functor>() API, but it doesn't inherit from c10::OperatorKernel. Please have the functor inherit from it.");
+  static void call(
+      OperatorKernel* functor,
+      const OperatorHandle&,
+      DispatchKeySet dispatchKeySet,
+      Stack* stack) {
+    using ReturnType =
+        typename guts::infer_function_traits_t<KernelFunctor>::return_type;
+    // We're explicitly filtering out DispatchKeySet from the argument list.
+    // Some kernels take a DispatchKeySet as their first argument in order to
+    // plumb keys through the dispatcher. We don't want to expose the
+    // DispatchKeySet type to jit, so we don't include this argument on the
+    // stack. See Note [Plumbing Keys Through The Dispatcher] for the
+    // background.
+    using ArgTypes = typename c10::remove_DispatchKeySet_arg_from_func<
+        KernelFunctor>::parameter_types;
+    constexpr bool has_outputs = !std::is_same_v<void, ReturnType>;
+    constexpr size_t num_inputs = guts::typelist::size<ArgTypes>::value;
+    if constexpr (has_outputs) {
+      // Decay ReturnType to ReturnType_ so that if a reference gets returned,
+      // we actually store it by value and don't get a dangling reference. This
+      // is only required because some kernels still return `Tensor&`. [Note:
+      // VC++ and 'std': ambiguous symbol]
+      using ReturnType_ = ::std::decay_t<ReturnType>;
+      ReturnType_ output = call_functor_with_args_from_stack<
+          KernelFunctor,
+          AllowDeprecatedTypes>(functor, dispatchKeySet, stack);
+      torch::jit::drop(*stack, num_inputs);
+      // See note [ VC++ and 'std': ambiguous symbol]
+      push_outputs<ReturnType_, AllowDeprecatedTypes>::call(
+          ::std::move(output), stack);
+    } else {
+      call_functor_with_args_from_stack<KernelFunctor, AllowDeprecatedTypes>(
+          functor, dispatchKeySet, stack);
+      torch::jit::drop(*stack, num_inputs);
+    }
+  }
+};
+} // namespace impl
+} // namespace c10
+namespace torch {
+using OperatorKernel = c10::OperatorKernel;
+}

phivenv/Lib/site-packages/torch/include/ATen/core/boxing/impl/test_helpers.h ADDED Viewed

	@@ -0,0 +1,140 @@

+#pragma once
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include <ATen/core/Tensor.h>
+#include <ATen/core/dispatch/Dispatcher.h>
+#include <ATen/core/ivalue.h>
+#include <c10/core/CPUAllocator.h>
+#include <c10/util/irange.h>
+template <class... Inputs>
+inline std::vector<c10::IValue> makeStack(Inputs&&... inputs) {
+  return {std::forward<Inputs>(inputs)...};
+}
+inline at::Tensor dummyTensor(
+    c10::DispatchKeySet ks,
+    bool requires_grad = false) {
+  auto* allocator = c10::GetCPUAllocator();
+  int64_t nelements = 1;
+  auto dtype = caffe2::TypeMeta::Make<float>();
+  int64_t size_bytes = nelements * dtype.itemsize();
+  auto storage_impl = c10::make_intrusive<c10::StorageImpl>(
+      c10::StorageImpl::use_byte_size_t(),
+      size_bytes,
+      allocator->allocate(size_bytes),
+      allocator,
+      /*resizable=*/true);
+  at::Tensor t =
+      at::detail::make_tensor<c10::TensorImpl>(storage_impl, ks, dtype);
+  // TODO: We add this to simulate the ideal case where we only have Autograd
+  // backend keys
+  //       on Tensor when it requires grad. But currently Autograd keys are
+  //       added in TensorImpl constructor by default.
+  if (!requires_grad) {
+    t.unsafeGetTensorImpl()->remove_autograd_key();
+  }
+  return t;
+}
+inline at::Tensor dummyTensor(
+    c10::DispatchKey dispatch_key,
+    bool requires_grad = false) {
+  return dummyTensor(c10::DispatchKeySet(dispatch_key), requires_grad);
+}
+template <class... Args>
+inline std::vector<c10::IValue> callOp(
+    const c10::OperatorHandle& op,
+    Args... args) {
+  auto stack = makeStack(std::forward<Args>(args)...);
+  op.callBoxed(&stack);
+  return stack;
+}
+template <class Result, class... Args>
+inline Result callOpUnboxed(const c10::OperatorHandle& op, Args... args) {
+  return op.typed<Result(Args...)>().call(std::forward<Args>(args)...);
+}
+template <class Result, class... Args>
+inline Result callOpUnboxedWithDispatchKey(
+    const c10::OperatorHandle& op,
+    c10::DispatchKey dispatchKey,
+    Args... args) {
+  return op.typed<Result(Args...)>().callWithDispatchKey(
+      dispatchKey, std::forward<Args>(args)...);
+}
+template <class Result, class... Args>
+inline Result callOpUnboxedWithPrecomputedDispatchKeySet(
+    const c10::OperatorHandle& op,
+    c10::DispatchKeySet ks,
+    Args... args) {
+  return op.typed<Result(Args...)>().redispatch(
+      ks, std::forward<Args>(args)...);
+}
+inline void expectDoesntFindKernel(
+    const char* op_name,
+    c10::DispatchKey dispatch_key) {
+  auto op = c10::Dispatcher::singleton().findSchema({op_name, ""});
+  EXPECT_ANY_THROW(callOp(*op, dummyTensor(dispatch_key), 5););
+}
+inline void expectDoesntFindOperator(const char* op_name) {
+  auto op = c10::Dispatcher::singleton().findSchema({op_name, ""});
+  EXPECT_FALSE(op.has_value());
+}
+template <class Exception, class Functor>
+inline void expectThrows(Functor&& functor, const char* expectMessageContains) {
+  try {
+    std::forward<Functor>(functor)();
+  } catch (const Exception& e) {
+    EXPECT_THAT(e.what(), testing::HasSubstr(expectMessageContains));
+    return;
+  }
+  ADD_FAILURE() << "Expected to throw exception containing \""
+                << expectMessageContains << "\" but didn't throw";
+}
+template <class T, size_t N>
+void expectListEquals(c10::ArrayRef<T> expected, std::array<T, N> actual) {
+  EXPECT_EQ(expected.size(), actual.size());
+  for (const auto i : c10::irange(expected.size())) {
+    EXPECT_EQ(expected[i], actual[i]);
+  }
+}
+template <class T>
+void expectListEquals(c10::ArrayRef<T> expected, c10::ArrayRef<T> actual) {
+  EXPECT_EQ(expected.size(), actual.size());
+  for (const auto i : c10::irange(expected.size())) {
+    EXPECT_EQ(expected[i], actual[i]);
+  }
+}
+template <class T>
+void expectListEquals(c10::ArrayRef<T> expected, c10::List<T> actual) {
+  EXPECT_EQ(expected.size(), actual.size());
+  for (const auto i : c10::irange(expected.size())) {
+    EXPECT_EQ(expected[i], actual.get(i));
+  }
+}
+template <class T>
+void expectListEquals(c10::ArrayRef<T> expected, std::vector<T> actual) {
+  EXPECT_EQ(expected.size(), actual.size());
+  for (const auto i : c10::irange(expected.size())) {
+    EXPECT_EQ(expected[i], actual[i]);
+  }
+}
+// NB: This is not really sound, but all of the type sets constructed here
+// are singletons so it's fine
+static inline c10::DispatchKey extractDispatchKey(const at::Tensor& t) {
+  return legacyExtractDispatchKey(t.key_set());
+}

phivenv/Lib/site-packages/torch/include/ATen/core/dispatch/CppSignature.h ADDED Viewed

	@@ -0,0 +1,67 @@

+#pragma once
+#include <c10/core/DispatchKeySet.h>
+#include <c10/macros/Macros.h>
+#include <c10/util/Metaprogramming.h>
+#include <c10/util/Type.h>
+#include <typeindex>
+namespace c10::impl {
+// A CppSignature object holds RTTI information about a C++ function signature
+// at runtime and can compare them or get a debug-printable name.
+class TORCH_API CppSignature final {
+ public:
+  CppSignature(const CppSignature&) = default;
+  CppSignature(CppSignature&&) noexcept = default;
+  CppSignature& operator=(const CppSignature&) = default;
+  CppSignature& operator=(CppSignature&&) noexcept = default;
+  template <class FuncType>
+  static CppSignature make() {
+    // Normalize functors, lambdas, function pointers, etc. into the plain
+    // function type The first argument of the schema might be of type
+    // DispatchKeySet, in which case we remove it. We do this to guarantee that
+    // all CppSignature's for an operator will match, even if they're registered
+    // with different calling conventions.
+    // See Note [Plumbing Keys Through The Dispatcher]
+    using decayed_function_type =
+        typename c10::remove_DispatchKeySet_arg_from_func<
+            std::decay_t<FuncType>>::func_type;
+    return CppSignature(std::type_index(typeid(decayed_function_type)));
+  }
+  std::string name() const {
+    return c10::demangle(signature_.name());
+  }
+  friend bool operator==(const CppSignature& lhs, const CppSignature& rhs) {
+    if (lhs.signature_ == rhs.signature_) {
+      return true;
+    }
+    // Without RTLD_GLOBAL, the type_index comparison could yield false because
+    // they point to different instances of the RTTI data, but the types would
+    // still be the same. Let's check for that case too.
+    // Note that there still is a case where this might not work, i.e. when
+    // linking libraries of different compilers together, they might have
+    // different ways to serialize a type name. That, together with a missing
+    // RTLD_GLOBAL, would still fail this.
+    if (0 == strcmp(lhs.signature_.name(), rhs.signature_.name())) {
+      return true;
+    }
+    return false;
+  }
+ private:
+  explicit CppSignature(std::type_index signature)
+      : signature_(std::move(signature)) {}
+  std::type_index signature_;
+};
+inline bool operator!=(const CppSignature& lhs, const CppSignature& rhs) {
+  return !(lhs == rhs);
+}
+} // namespace c10::impl

phivenv/Lib/site-packages/torch/include/ATen/core/dispatch/DispatchKeyExtractor.h ADDED Viewed

	@@ -0,0 +1,279 @@

+#pragma once
+#include <ATen/core/Variadic.h>
+#include <ATen/core/function_schema.h>
+#include <ATen/core/jit_type.h>
+#include <ATen/core/stack.h>
+#include <c10/core/DispatchKeySet.h>
+#include <c10/util/Bitset.h>
+#include <c10/util/irange.h>
+#include <cstdint>
+namespace c10 {
+namespace impl {
+// Take a DispatchKeySet for a Tensor and determine what the actual dispatch
+// DispatchKey should be, taking into account TLS, and skipping backends which
+// fall through.
+//
+// Unlike Tensor::key_set(), the value of this on a tensor can change depending
+// on TLS.
+//
+// NB: If there is no valid dispatch key, this will return Undefined
+inline DispatchKeySet computeDispatchKeySet(
+    DispatchKeySet ks,
+    // The key mask lets us eliminate (by zero entries) keys which should not
+    // be considered for dispatch.  There are two cases when we use this:
+    //
+    // - If an operator's dispatch table contains a fallthrough entry, we
+    //   should bypass it entirely when finding the key
+    // - If a user invokes with redispatch, the mask lets us
+    //   zero out the key the user asked us to stop.
+    //
+    // These excluded backends are NOT tracked in the TLS, but must be applied
+    // AFTER TLS (since the backend may have been introduced for consideration
+    // by the included TLS), which is why you have to pass them in to this
+    // function (as opposed to just applying it to the input 'ks').
+    DispatchKeySet key_mask) {
+  c10::impl::LocalDispatchKeySet local =
+      c10::impl::tls_local_dispatch_key_set();
+  // TODO: It's a bit irritating that we have to do logical ORs here, it would
+  // be nice to only do one.  Can always_included be folded into the TLS?  Well,
+  // it's a bit troublesome, because fastpath TLS access requires the type of
+  // the TLS in question to be zero-initialized, so you don't actually win
+  // anything in that case.
+  return (((ks | local.included_) - local.excluded_) & key_mask);
+}
+} // namespace impl
+namespace detail {
+// A small gadget to extract the DispatchKeySet from types which are known
+// to have it.  Used to extract dispatch keys from unboxed calls.
+struct MultiDispatchKeySet : at::IterArgs<MultiDispatchKeySet> {
+  DispatchKeySet ts;
+  void operator()(const at::Tensor& x) {
+    ts = ts | x.key_set();
+  }
+  void operator()(const std::optional<at::Tensor>& x) {
+    if (x.has_value()) {
+      ts = ts | x->key_set();
+    }
+  }
+  void operator()(at::ArrayRef<at::Tensor> xs) {
+    for (const auto& x : xs) {
+      ts = ts | x.key_set();
+    }
+  }
+  // Tensor?[] translates to this case.
+  void operator()(const c10::List<std::optional<at::Tensor>>& xs) {
+    for (std::optional<at::Tensor> x : xs) {
+      if (x.has_value()) {
+        ts = ts | x.value().key_set();
+      }
+    }
+  }
+  // Structured Tensor[] translates to this case
+  void operator()(const at::ITensorListRef& xs) {
+    for (const auto& x : xs) {
+      ts = ts | x.key_set();
+    }
+  }
+  [[noreturn]] void operator()(at::ArrayRef<std::optional<at::Tensor>>) {
+    // Just checking that the handling of Tensor?[] didn't change.
+    TORCH_INTERNAL_ASSERT(false);
+  }
+  void operator()(const at::Generator& gen) {
+    if (gen.defined()) {
+      ts = ts | gen.key_set();
+    }
+  }
+  void operator()(const std::optional<at::Generator>& gen) {
+    if (gen.has_value() && gen->defined()) {
+      ts = ts | gen->key_set();
+    }
+  }
+  template <typename T>
+  void operator()(const T&) {
+    // do nothing
+  }
+};
+// NB: take by const reference (Don't do universal forwarding here! You
+// don't want to move into this function!)
+template <typename... Args>
+DispatchKeySet multi_dispatch_key_set(const Args&... args) {
+  return MultiDispatchKeySet().apply(args...).ts;
+}
+} // namespace detail
+/**
+ * An instance of DispatchKeyExtractor knows how to get a dispatch key given
+ * a list of arguments for an operator call.
+ *
+ * The instance is specific for a certain operator as:
+ *  - In boxed dispatch, different operators have different ways to extract
+ *    the dispatch key (e.g. different numbers of arguments), and we precompute
+ *    the stack locations we should look at; and
+ *  - In all dispatch, some backends should be excluded from dispatch because
+ *    they have been registered as fallthrough.  The set of excluded backends
+ *    varies from operator, as some operators may have overridden the
+ *    fallthrough with custom behavior.
+ *
+ *   Note - this should maintain identical impl to the py dispatcher key
+ * extraction logic at pytorch/torch/dispatcher.py
+ */
+struct TORCH_API DispatchKeyExtractor final {
+ public:
+  static DispatchKeyExtractor make(const FunctionSchema& schema) {
+    return DispatchKeyExtractor(makeBitsetForDispatchArgs(schema));
+  }
+  static DispatchKeyExtractor makeUninitialized() {
+    return DispatchKeyExtractor(c10::utils::bitset());
+  }
+  void registerSchema(const FunctionSchema& schema) {
+    TORCH_INTERNAL_ASSERT(dispatch_arg_indices_reverse_.is_entirely_unset());
+    dispatch_arg_indices_reverse_ = makeBitsetForDispatchArgs(schema);
+  }
+  void deregisterSchema() {
+    dispatch_arg_indices_reverse_ = c10::utils::bitset();
+  }
+  DispatchKeySet getDispatchKeySetBoxed(const torch::jit::Stack* stack) const {
+    DispatchKeySet ks;
+    dispatch_arg_indices_reverse_.for_each_set_bit([&](size_t
+                                                           reverse_arg_index) {
+      const auto& ivalue = torch::jit::peek(*stack, 0, reverse_arg_index + 1);
+      if (C10_LIKELY(ivalue.isTensor())) {
+        // NB: Take care not to introduce a refcount bump (there's
+        // no safe toTensorRef method, alas)
+        ks = ks | ivalue.unsafeToTensorImpl()->key_set();
+      } else if (C10_UNLIKELY(ivalue.isTensorList())) {
+        // NB: use toListRef as it doesn't induce refcount bumps
+        // (toTensorListRef is not a thing)
+        for (const auto& nv : ivalue.toListRef()) {
+          auto* tensor = nv.unsafeToTensorImpl();
+          ks = ks | tensor->key_set();
+        }
+      }
+      // Tensor?[] translates to a c10::List<IValue> so we need to peek inside
+      else if (C10_UNLIKELY(ivalue.isList())) {
+        for (const auto& elt : ivalue.toListRef()) {
+          if (elt.isTensor()) {
+            ks = ks | elt.toTensor().key_set();
+          }
+        }
+      }
+    });
+    // Keys that are fallthrough should be skipped
+    if (requiresBitsetPerBackend_) {
+      c10::impl::LocalDispatchKeySet tls =
+          c10::impl::tls_local_dispatch_key_set();
+      auto backend_idx =
+          ((ks | tls.included_) - tls.excluded_).getBackendIndex();
+      return impl::computeDispatchKeySet(
+          ks, nonFallthroughKeysPerBackend_[backend_idx]);
+    } else {
+      return impl::computeDispatchKeySet(ks, nonFallthroughKeys_);
+    }
+  }
+  template <class... Args>
+  DispatchKeySet getDispatchKeySetUnboxed(const Args&... args) const {
+    auto ks = detail::multi_dispatch_key_set(args...);
+    // Keys that are fallthrough should be skipped
+    if (requiresBitsetPerBackend_) {
+      c10::impl::LocalDispatchKeySet tls =
+          c10::impl::tls_local_dispatch_key_set();
+      auto backend_idx =
+          ((ks | tls.included_) - tls.excluded_).getBackendIndex();
+      return impl::computeDispatchKeySet(
+          ks, nonFallthroughKeysPerBackend_[backend_idx]);
+    } else {
+      return impl::computeDispatchKeySet(ks, nonFallthroughKeys_);
+    }
+  }
+  void setOperatorHasFallthroughForKey(DispatchKey k, bool has_fallthrough);
+  std::string dumpState() const;
+  void checkInvariants(const FunctionSchema& schema) const;
+ private:
+  static bool isDispatchType(const Type& type) {
+    // Checking isSubtypeOf on a DynamicType heap-allocates a
+    // DynamicType version of the argument if it's not a DynamicType
+    // already, and this has measurable overhead during startup.
+#ifdef C10_MOBILE
+    struct CachedTypes {
+      DynamicTypePtr listOfTensors;
+      DynamicTypePtr listOfOptionalTensors;
+      DynamicTypePtr optionalOfTensor;
+    };
+    static const CachedTypes ct = {
+        DynamicType::create(*ListType::ofTensors()),
+        DynamicType::create(*ListType::ofOptionalTensors()),
+        DynamicType::create(*OptionalType::ofTensor())};
+    return type.isSubtypeOf(c10::TypeFactory::get<TensorType>()) ||
+        type.isSubtypeOf(ct.listOfTensors) ||
+        type.isSubtypeOf(ct.listOfOptionalTensors) ||
+        type.isSubtypeOf(ct.optionalOfTensor);
+#else // C10_MOBILE
+    return type.isSubtypeOf(*TensorType::get()) ||
+        type.isSubtypeOf(*ListType::ofTensors()) ||
+        type.isSubtypeOf(*ListType::ofOptionalTensors()) ||
+        type.isSubtypeOf(*OptionalType::ofTensor());
+#endif // C10_MOBILE
+  }
+  static c10::utils::bitset makeBitsetForDispatchArgs(
+      const FunctionSchema& schema) {
+    TORCH_CHECK(
+        schema.arguments().size() <= c10::utils::bitset::NUM_BITS(),
+        "The function schema has ",
+        schema.arguments().size(),
+        " arguments but this PyTorch build only supports ",
+        c10::utils::bitset::NUM_BITS());
+    c10::utils::bitset dispatch_arg_indices_reverse;
+    for (const auto index : c10::irange(schema.arguments().size())) {
+      if (isDispatchType(*schema.arguments()[index].type())) {
+        dispatch_arg_indices_reverse.set(schema.arguments().size() - 1 - index);
+      }
+    }
+    return dispatch_arg_indices_reverse;
+  }
+  explicit DispatchKeyExtractor(c10::utils::bitset dispatch_arg_indices_reverse)
+      : dispatch_arg_indices_reverse_(dispatch_arg_indices_reverse),
+        nonFallthroughKeys_(DispatchKeySet::FULL) {
+    for (const auto i : c10::irange(nonFallthroughKeysPerBackend_.size())) {
+      nonFallthroughKeysPerBackend_[i] = DispatchKeySet::FULL;
+    }
+  }
+  // this is a bitset that has ones for each argument index which has to be
+  // considered for dispatch. This avoids having to iterate over the stack
+  // to find all the tensors. The bits are stored in reverse order, i.e.
+  // dispatch_arg_indices_reverse_[i] == true, then the i-th argument from
+  // the top of the stack (i.e. the i-th last argument of the function)
+  // is relevant for dispatch.
+  // dispatch_arg_indices_reverse_ is allowed to have zero bits set; that just
+  // means you must do the fallthrough
+  c10::utils::bitset dispatch_arg_indices_reverse_;
+  // Set of functionality keys for which the operator does NOT have fallthrough
+  // kernel.
+  DispatchKeySet nonFallthroughKeys_;
+  // Set of functionality keys for which the operator does NOT have fallthrough
+  // kernel, defined PER BACKEND. This is only needed if we know that the
+  // operator has a different set of fallthroughs defined for some backends.
+  std::array<DispatchKeySet, num_backends> nonFallthroughKeysPerBackend_;
+  // Flag to tell us if we can use the single set of nonFallthroughKeys_ (fast
+  // path), or if we need to fall back to the slower path and check
+  // nonFallthroughKeysPerBackend_
+  bool requiresBitsetPerBackend_{false};
+};
+} // namespace c10